2 * Copyright 2014 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
23 #include <linux/kernel.h>
24 #include <linux/firmware.h>
27 #include "amdgpu_gfx.h"
29 #include "vi_structs.h"
31 #include "amdgpu_ucode.h"
32 #include "amdgpu_atombios.h"
33 #include "atombios_i2c.h"
34 #include "clearstate_vi.h"
36 #include "gmc/gmc_8_2_d.h"
37 #include "gmc/gmc_8_2_sh_mask.h"
39 #include "oss/oss_3_0_d.h"
40 #include "oss/oss_3_0_sh_mask.h"
42 #include "bif/bif_5_0_d.h"
43 #include "bif/bif_5_0_sh_mask.h"
44 #include "gca/gfx_8_0_d.h"
45 #include "gca/gfx_8_0_enum.h"
46 #include "gca/gfx_8_0_sh_mask.h"
47 #include "gca/gfx_8_0_enum.h"
49 #include "dce/dce_10_0_d.h"
50 #include "dce/dce_10_0_sh_mask.h"
52 #include "smu/smu_7_1_3_d.h"
54 #define GFX8_NUM_GFX_RINGS 1
55 #define GFX8_MEC_HPD_SIZE 2048
57 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
58 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
59 #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
60 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
62 #define ARRAY_MODE(x) ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
63 #define PIPE_CONFIG(x) ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
64 #define TILE_SPLIT(x) ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
65 #define MICRO_TILE_MODE_NEW(x) ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
66 #define SAMPLE_SPLIT(x) ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
67 #define BANK_WIDTH(x) ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
68 #define BANK_HEIGHT(x) ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
69 #define MACRO_TILE_ASPECT(x) ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
70 #define NUM_BANKS(x) ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
72 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK 0x00000001L
73 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK 0x00000002L
74 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK 0x00000004L
75 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK 0x00000008L
76 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK 0x00000010L
77 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK 0x00000020L
80 #define SET_BPM_SERDES_CMD 1
81 #define CLE_BPM_SERDES_CMD 0
83 /* BPM Register Address*/
85 BPM_REG_CGLS_EN = 0, /* Enable/Disable CGLS */
86 BPM_REG_CGLS_ON, /* ON/OFF CGLS: shall be controlled by RLC FW */
87 BPM_REG_CGCG_OVERRIDE, /* Set/Clear CGCG Override */
88 BPM_REG_MGCG_OVERRIDE, /* Set/Clear MGCG Override */
89 BPM_REG_FGCG_OVERRIDE, /* Set/Clear FGCG Override */
93 #define RLC_FormatDirectRegListLength 14
95 MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
96 MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
97 MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
98 MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
99 MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
100 MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
102 MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
103 MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
104 MODULE_FIRMWARE("amdgpu/stoney_me.bin");
105 MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
106 MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
108 MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
109 MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
110 MODULE_FIRMWARE("amdgpu/tonga_me.bin");
111 MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
112 MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
113 MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
115 MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
116 MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
117 MODULE_FIRMWARE("amdgpu/topaz_me.bin");
118 MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
119 MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
121 MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
122 MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
123 MODULE_FIRMWARE("amdgpu/fiji_me.bin");
124 MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
125 MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
126 MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
128 MODULE_FIRMWARE("amdgpu/polaris10_ce.bin");
129 MODULE_FIRMWARE("amdgpu/polaris10_ce_2.bin");
130 MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin");
131 MODULE_FIRMWARE("amdgpu/polaris10_pfp_2.bin");
132 MODULE_FIRMWARE("amdgpu/polaris10_me.bin");
133 MODULE_FIRMWARE("amdgpu/polaris10_me_2.bin");
134 MODULE_FIRMWARE("amdgpu/polaris10_mec.bin");
135 MODULE_FIRMWARE("amdgpu/polaris10_mec_2.bin");
136 MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin");
137 MODULE_FIRMWARE("amdgpu/polaris10_mec2_2.bin");
138 MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin");
140 MODULE_FIRMWARE("amdgpu/polaris11_ce.bin");
141 MODULE_FIRMWARE("amdgpu/polaris11_ce_2.bin");
142 MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin");
143 MODULE_FIRMWARE("amdgpu/polaris11_pfp_2.bin");
144 MODULE_FIRMWARE("amdgpu/polaris11_me.bin");
145 MODULE_FIRMWARE("amdgpu/polaris11_me_2.bin");
146 MODULE_FIRMWARE("amdgpu/polaris11_mec.bin");
147 MODULE_FIRMWARE("amdgpu/polaris11_mec_2.bin");
148 MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin");
149 MODULE_FIRMWARE("amdgpu/polaris11_mec2_2.bin");
150 MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin");
152 MODULE_FIRMWARE("amdgpu/polaris12_ce.bin");
153 MODULE_FIRMWARE("amdgpu/polaris12_ce_2.bin");
154 MODULE_FIRMWARE("amdgpu/polaris12_pfp.bin");
155 MODULE_FIRMWARE("amdgpu/polaris12_pfp_2.bin");
156 MODULE_FIRMWARE("amdgpu/polaris12_me.bin");
157 MODULE_FIRMWARE("amdgpu/polaris12_me_2.bin");
158 MODULE_FIRMWARE("amdgpu/polaris12_mec.bin");
159 MODULE_FIRMWARE("amdgpu/polaris12_mec_2.bin");
160 MODULE_FIRMWARE("amdgpu/polaris12_mec2.bin");
161 MODULE_FIRMWARE("amdgpu/polaris12_mec2_2.bin");
162 MODULE_FIRMWARE("amdgpu/polaris12_rlc.bin");
164 MODULE_FIRMWARE("amdgpu/vegam_ce.bin");
165 MODULE_FIRMWARE("amdgpu/vegam_pfp.bin");
166 MODULE_FIRMWARE("amdgpu/vegam_me.bin");
167 MODULE_FIRMWARE("amdgpu/vegam_mec.bin");
168 MODULE_FIRMWARE("amdgpu/vegam_mec2.bin");
169 MODULE_FIRMWARE("amdgpu/vegam_rlc.bin");
171 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
173 {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
174 {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
175 {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
176 {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
177 {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
178 {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
179 {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
180 {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
181 {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
182 {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
183 {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
184 {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
185 {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
186 {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
187 {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
188 {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
191 static const u32 golden_settings_tonga_a11[] =
193 mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
194 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
195 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
196 mmGB_GPU_ID, 0x0000000f, 0x00000000,
197 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
198 mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
199 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
200 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
201 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
202 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
203 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
204 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
205 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
206 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
207 mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
208 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
211 static const u32 tonga_golden_common_all[] =
213 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
214 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
215 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
216 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
217 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
218 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
219 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
220 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
223 static const u32 tonga_mgcg_cgcg_init[] =
225 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
226 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
227 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
228 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
229 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
230 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
231 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
232 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
233 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
234 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
235 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
236 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
237 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
238 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
239 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
240 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
241 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
242 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
243 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
244 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
245 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
246 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
247 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
248 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
249 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
250 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
251 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
252 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
253 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
254 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
255 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
256 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
257 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
258 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
259 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
260 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
261 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
262 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
263 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
264 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
265 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
266 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
267 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
268 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
269 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
270 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
271 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
272 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
273 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
274 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
275 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
276 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
277 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
278 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
279 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
280 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
281 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
282 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
283 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
284 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
285 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
286 mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
287 mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
288 mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
289 mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
290 mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
291 mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
292 mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
293 mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
294 mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
295 mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
296 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
297 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
298 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
299 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
302 static const u32 golden_settings_vegam_a11[] =
304 mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
305 mmCB_HW_CONTROL_2, 0x0f000000, 0x0d000000,
306 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
307 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
308 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
309 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
310 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x3a00161a,
311 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002e,
312 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
313 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
314 mmSQ_CONFIG, 0x07f80000, 0x01180000,
315 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
316 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
317 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
318 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
319 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x32761054,
320 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
323 static const u32 vegam_golden_common_all[] =
325 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
326 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
327 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
328 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
329 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
330 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
333 static const u32 golden_settings_polaris11_a11[] =
335 mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208,
336 mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
337 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
338 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
339 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
340 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
341 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
342 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
343 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
344 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
345 mmSQ_CONFIG, 0x07f80000, 0x01180000,
346 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
347 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
348 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
349 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
350 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
351 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
354 static const u32 polaris11_golden_common_all[] =
356 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
357 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002,
358 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
359 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
360 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
361 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
364 static const u32 golden_settings_polaris10_a11[] =
366 mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
367 mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
368 mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
369 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
370 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
371 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
372 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
373 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
374 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a,
375 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
376 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
377 mmSQ_CONFIG, 0x07f80000, 0x07180000,
378 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
379 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
380 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
381 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
382 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
385 static const u32 polaris10_golden_common_all[] =
387 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
388 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
389 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
390 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
391 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
392 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
393 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
394 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
397 static const u32 fiji_golden_common_all[] =
399 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
400 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
401 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
402 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
403 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
404 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
405 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
406 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
407 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
408 mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
411 static const u32 golden_settings_fiji_a10[] =
413 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
414 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
415 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
416 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
417 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
418 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
419 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
420 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
421 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
422 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
423 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
426 static const u32 fiji_mgcg_cgcg_init[] =
428 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
429 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
430 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
431 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
432 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
433 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
434 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
435 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
436 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
437 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
438 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
439 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
440 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
441 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
442 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
443 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
444 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
445 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
446 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
447 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
448 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
449 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
450 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
451 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
452 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
453 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
454 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
455 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
456 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
457 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
458 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
459 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
460 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
461 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
462 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
465 static const u32 golden_settings_iceland_a11[] =
467 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
468 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
469 mmDB_DEBUG3, 0xc0000000, 0xc0000000,
470 mmGB_GPU_ID, 0x0000000f, 0x00000000,
471 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
472 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
473 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
474 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
475 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
476 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
477 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
478 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
479 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
480 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
481 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
482 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
485 static const u32 iceland_golden_common_all[] =
487 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
488 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
489 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
490 mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
491 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
492 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
493 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
494 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
497 static const u32 iceland_mgcg_cgcg_init[] =
499 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
500 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
501 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
502 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
503 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
504 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
505 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
506 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
507 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
508 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
509 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
510 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
511 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
512 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
513 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
514 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
515 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
516 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
517 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
518 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
519 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
520 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
521 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
522 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
523 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
524 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
525 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
526 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
527 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
528 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
529 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
530 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
531 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
532 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
533 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
534 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
535 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
536 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
537 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
538 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
539 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
540 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
541 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
542 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
543 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
544 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
545 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
546 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
547 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
548 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
549 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
550 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
551 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
552 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
553 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
554 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
555 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
556 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
557 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
558 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
559 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
560 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
561 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
562 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
565 static const u32 cz_golden_settings_a11[] =
567 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
568 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
569 mmGB_GPU_ID, 0x0000000f, 0x00000000,
570 mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
571 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
572 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
573 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
574 mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
575 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
576 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
577 mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
578 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
581 static const u32 cz_golden_common_all[] =
583 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
584 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
585 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
586 mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
587 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
588 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
589 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
590 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
593 static const u32 cz_mgcg_cgcg_init[] =
595 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
596 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
597 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
598 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
599 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
600 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
601 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
602 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
603 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
604 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
605 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
606 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
607 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
608 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
609 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
610 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
611 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
612 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
613 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
614 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
615 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
616 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
617 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
618 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
619 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
620 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
621 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
622 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
623 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
624 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
625 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
626 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
627 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
628 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
629 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
630 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
631 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
632 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
633 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
634 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
635 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
636 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
637 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
638 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
639 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
640 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
641 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
642 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
643 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
644 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
645 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
646 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
647 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
648 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
649 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
650 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
651 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
652 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
653 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
654 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
655 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
656 mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
657 mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
658 mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
659 mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
660 mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
661 mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
662 mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
663 mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
664 mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
665 mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
666 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
667 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
668 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
669 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
672 static const u32 stoney_golden_settings_a11[] =
674 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
675 mmGB_GPU_ID, 0x0000000f, 0x00000000,
676 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
677 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
678 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
679 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
680 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
681 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
682 mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
683 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
686 static const u32 stoney_golden_common_all[] =
688 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
689 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
690 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
691 mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
692 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
693 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
694 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
695 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
698 static const u32 stoney_mgcg_cgcg_init[] =
700 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
701 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
702 mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
703 mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
704 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
708 static const char * const sq_edc_source_names[] = {
709 "SQ_EDC_INFO_SOURCE_INVALID: No EDC error has occurred",
710 "SQ_EDC_INFO_SOURCE_INST: EDC source is Instruction Fetch",
711 "SQ_EDC_INFO_SOURCE_SGPR: EDC source is SGPR or SQC data return",
712 "SQ_EDC_INFO_SOURCE_VGPR: EDC source is VGPR",
713 "SQ_EDC_INFO_SOURCE_LDS: EDC source is LDS",
714 "SQ_EDC_INFO_SOURCE_GDS: EDC source is GDS",
715 "SQ_EDC_INFO_SOURCE_TA: EDC source is TA",
718 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
719 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
720 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
721 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
722 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
723 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
724 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring);
725 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring);
727 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
729 switch (adev->asic_type) {
731 amdgpu_device_program_register_sequence(adev,
732 iceland_mgcg_cgcg_init,
733 ARRAY_SIZE(iceland_mgcg_cgcg_init));
734 amdgpu_device_program_register_sequence(adev,
735 golden_settings_iceland_a11,
736 ARRAY_SIZE(golden_settings_iceland_a11));
737 amdgpu_device_program_register_sequence(adev,
738 iceland_golden_common_all,
739 ARRAY_SIZE(iceland_golden_common_all));
742 amdgpu_device_program_register_sequence(adev,
744 ARRAY_SIZE(fiji_mgcg_cgcg_init));
745 amdgpu_device_program_register_sequence(adev,
746 golden_settings_fiji_a10,
747 ARRAY_SIZE(golden_settings_fiji_a10));
748 amdgpu_device_program_register_sequence(adev,
749 fiji_golden_common_all,
750 ARRAY_SIZE(fiji_golden_common_all));
754 amdgpu_device_program_register_sequence(adev,
755 tonga_mgcg_cgcg_init,
756 ARRAY_SIZE(tonga_mgcg_cgcg_init));
757 amdgpu_device_program_register_sequence(adev,
758 golden_settings_tonga_a11,
759 ARRAY_SIZE(golden_settings_tonga_a11));
760 amdgpu_device_program_register_sequence(adev,
761 tonga_golden_common_all,
762 ARRAY_SIZE(tonga_golden_common_all));
765 amdgpu_device_program_register_sequence(adev,
766 golden_settings_vegam_a11,
767 ARRAY_SIZE(golden_settings_vegam_a11));
768 amdgpu_device_program_register_sequence(adev,
769 vegam_golden_common_all,
770 ARRAY_SIZE(vegam_golden_common_all));
774 amdgpu_device_program_register_sequence(adev,
775 golden_settings_polaris11_a11,
776 ARRAY_SIZE(golden_settings_polaris11_a11));
777 amdgpu_device_program_register_sequence(adev,
778 polaris11_golden_common_all,
779 ARRAY_SIZE(polaris11_golden_common_all));
782 amdgpu_device_program_register_sequence(adev,
783 golden_settings_polaris10_a11,
784 ARRAY_SIZE(golden_settings_polaris10_a11));
785 amdgpu_device_program_register_sequence(adev,
786 polaris10_golden_common_all,
787 ARRAY_SIZE(polaris10_golden_common_all));
788 WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C);
789 if (adev->pdev->revision == 0xc7 &&
790 ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) ||
791 (adev->pdev->subsystem_device == 0x4a8 && adev->pdev->subsystem_vendor == 0x1043) ||
792 (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1682))) {
793 amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD);
794 amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0);
798 amdgpu_device_program_register_sequence(adev,
800 ARRAY_SIZE(cz_mgcg_cgcg_init));
801 amdgpu_device_program_register_sequence(adev,
802 cz_golden_settings_a11,
803 ARRAY_SIZE(cz_golden_settings_a11));
804 amdgpu_device_program_register_sequence(adev,
805 cz_golden_common_all,
806 ARRAY_SIZE(cz_golden_common_all));
809 amdgpu_device_program_register_sequence(adev,
810 stoney_mgcg_cgcg_init,
811 ARRAY_SIZE(stoney_mgcg_cgcg_init));
812 amdgpu_device_program_register_sequence(adev,
813 stoney_golden_settings_a11,
814 ARRAY_SIZE(stoney_golden_settings_a11));
815 amdgpu_device_program_register_sequence(adev,
816 stoney_golden_common_all,
817 ARRAY_SIZE(stoney_golden_common_all));
824 static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
826 adev->gfx.scratch.num_reg = 8;
827 adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
828 adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
831 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
833 struct amdgpu_device *adev = ring->adev;
839 r = amdgpu_gfx_scratch_get(adev, &scratch);
841 DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
844 WREG32(scratch, 0xCAFEDEAD);
845 r = amdgpu_ring_alloc(ring, 3);
847 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
849 amdgpu_gfx_scratch_free(adev, scratch);
852 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
853 amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
854 amdgpu_ring_write(ring, 0xDEADBEEF);
855 amdgpu_ring_commit(ring);
857 for (i = 0; i < adev->usec_timeout; i++) {
858 tmp = RREG32(scratch);
859 if (tmp == 0xDEADBEEF)
863 if (i < adev->usec_timeout) {
864 DRM_DEBUG("ring test on %d succeeded in %d usecs\n",
867 DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
868 ring->idx, scratch, tmp);
871 amdgpu_gfx_scratch_free(adev, scratch);
875 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
877 struct amdgpu_device *adev = ring->adev;
879 struct dma_fence *f = NULL;
886 r = amdgpu_device_wb_get(adev, &index);
888 dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r);
892 gpu_addr = adev->wb.gpu_addr + (index * 4);
893 adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
894 memset(&ib, 0, sizeof(ib));
895 r = amdgpu_ib_get(adev, NULL, 16, &ib);
897 DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
900 ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
901 ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
902 ib.ptr[2] = lower_32_bits(gpu_addr);
903 ib.ptr[3] = upper_32_bits(gpu_addr);
904 ib.ptr[4] = 0xDEADBEEF;
907 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
911 r = dma_fence_wait_timeout(f, false, timeout);
913 DRM_ERROR("amdgpu: IB test timed out.\n");
917 DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
921 tmp = adev->wb.wb[index];
922 if (tmp == 0xDEADBEEF) {
923 DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx);
926 DRM_ERROR("ib test on ring %d failed\n", ring->idx);
931 amdgpu_ib_free(adev, &ib, NULL);
934 amdgpu_device_wb_free(adev, index);
939 static void gfx_v8_0_free_microcode(struct amdgpu_device *adev)
941 release_firmware(adev->gfx.pfp_fw);
942 adev->gfx.pfp_fw = NULL;
943 release_firmware(adev->gfx.me_fw);
944 adev->gfx.me_fw = NULL;
945 release_firmware(adev->gfx.ce_fw);
946 adev->gfx.ce_fw = NULL;
947 release_firmware(adev->gfx.rlc_fw);
948 adev->gfx.rlc_fw = NULL;
949 release_firmware(adev->gfx.mec_fw);
950 adev->gfx.mec_fw = NULL;
951 if ((adev->asic_type != CHIP_STONEY) &&
952 (adev->asic_type != CHIP_TOPAZ))
953 release_firmware(adev->gfx.mec2_fw);
954 adev->gfx.mec2_fw = NULL;
956 kfree(adev->gfx.rlc.register_list_format);
959 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
961 const char *chip_name;
964 struct amdgpu_firmware_info *info = NULL;
965 const struct common_firmware_header *header = NULL;
966 const struct gfx_firmware_header_v1_0 *cp_hdr;
967 const struct rlc_firmware_header_v2_0 *rlc_hdr;
968 unsigned int *tmp = NULL, i;
972 switch (adev->asic_type) {
980 chip_name = "carrizo";
986 chip_name = "stoney";
989 chip_name = "polaris10";
992 chip_name = "polaris11";
995 chip_name = "polaris12";
1004 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1005 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp_2.bin", chip_name);
1006 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
1007 if (err == -ENOENT) {
1008 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
1009 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
1012 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
1013 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
1017 err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
1020 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1021 adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1022 adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1024 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1025 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me_2.bin", chip_name);
1026 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1027 if (err == -ENOENT) {
1028 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1029 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1032 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1033 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1037 err = amdgpu_ucode_validate(adev->gfx.me_fw);
1040 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1041 adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1043 adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1045 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1046 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce_2.bin", chip_name);
1047 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1048 if (err == -ENOENT) {
1049 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1050 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1053 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1054 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1058 err = amdgpu_ucode_validate(adev->gfx.ce_fw);
1061 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1062 adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1063 adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1066 * Support for MCBP/Virtualization in combination with chained IBs is
1067 * formal released on feature version #46
1069 if (adev->gfx.ce_feature_version >= 46 &&
1070 adev->gfx.pfp_feature_version >= 46) {
1071 adev->virt.chained_ib_support = true;
1072 DRM_INFO("Chained IB support enabled!\n");
1074 adev->virt.chained_ib_support = false;
1076 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
1077 err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
1080 err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
1081 rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1082 adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
1083 adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
1085 adev->gfx.rlc.save_and_restore_offset =
1086 le32_to_cpu(rlc_hdr->save_and_restore_offset);
1087 adev->gfx.rlc.clear_state_descriptor_offset =
1088 le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
1089 adev->gfx.rlc.avail_scratch_ram_locations =
1090 le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
1091 adev->gfx.rlc.reg_restore_list_size =
1092 le32_to_cpu(rlc_hdr->reg_restore_list_size);
1093 adev->gfx.rlc.reg_list_format_start =
1094 le32_to_cpu(rlc_hdr->reg_list_format_start);
1095 adev->gfx.rlc.reg_list_format_separate_start =
1096 le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
1097 adev->gfx.rlc.starting_offsets_start =
1098 le32_to_cpu(rlc_hdr->starting_offsets_start);
1099 adev->gfx.rlc.reg_list_format_size_bytes =
1100 le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
1101 adev->gfx.rlc.reg_list_size_bytes =
1102 le32_to_cpu(rlc_hdr->reg_list_size_bytes);
1104 adev->gfx.rlc.register_list_format =
1105 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
1106 adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
1108 if (!adev->gfx.rlc.register_list_format) {
1113 tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1114 le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1115 for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++)
1116 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
1118 adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1120 tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1121 le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1122 for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++)
1123 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
1125 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1126 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec_2.bin", chip_name);
1127 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1128 if (err == -ENOENT) {
1129 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1130 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1133 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1134 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1138 err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1141 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1142 adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1143 adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1145 if ((adev->asic_type != CHIP_STONEY) &&
1146 (adev->asic_type != CHIP_TOPAZ)) {
1147 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1148 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2_2.bin", chip_name);
1149 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1150 if (err == -ENOENT) {
1151 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1152 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1155 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1156 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1159 err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1162 cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1163 adev->gfx.mec2_fw->data;
1164 adev->gfx.mec2_fw_version =
1165 le32_to_cpu(cp_hdr->header.ucode_version);
1166 adev->gfx.mec2_feature_version =
1167 le32_to_cpu(cp_hdr->ucode_feature_version);
1170 adev->gfx.mec2_fw = NULL;
1174 if (adev->firmware.load_type == AMDGPU_FW_LOAD_SMU) {
1175 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1176 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1177 info->fw = adev->gfx.pfp_fw;
1178 header = (const struct common_firmware_header *)info->fw->data;
1179 adev->firmware.fw_size +=
1180 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1182 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1183 info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1184 info->fw = adev->gfx.me_fw;
1185 header = (const struct common_firmware_header *)info->fw->data;
1186 adev->firmware.fw_size +=
1187 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1189 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1190 info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1191 info->fw = adev->gfx.ce_fw;
1192 header = (const struct common_firmware_header *)info->fw->data;
1193 adev->firmware.fw_size +=
1194 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1196 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1197 info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1198 info->fw = adev->gfx.rlc_fw;
1199 header = (const struct common_firmware_header *)info->fw->data;
1200 adev->firmware.fw_size +=
1201 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1203 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1204 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1205 info->fw = adev->gfx.mec_fw;
1206 header = (const struct common_firmware_header *)info->fw->data;
1207 adev->firmware.fw_size +=
1208 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1210 /* we need account JT in */
1211 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1212 adev->firmware.fw_size +=
1213 ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE);
1215 if (amdgpu_sriov_vf(adev)) {
1216 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_STORAGE];
1217 info->ucode_id = AMDGPU_UCODE_ID_STORAGE;
1218 info->fw = adev->gfx.mec_fw;
1219 adev->firmware.fw_size +=
1220 ALIGN(le32_to_cpu(64 * PAGE_SIZE), PAGE_SIZE);
1223 if (adev->gfx.mec2_fw) {
1224 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1225 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1226 info->fw = adev->gfx.mec2_fw;
1227 header = (const struct common_firmware_header *)info->fw->data;
1228 adev->firmware.fw_size +=
1229 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1237 "gfx8: Failed to load firmware \"%s\"\n",
1239 release_firmware(adev->gfx.pfp_fw);
1240 adev->gfx.pfp_fw = NULL;
1241 release_firmware(adev->gfx.me_fw);
1242 adev->gfx.me_fw = NULL;
1243 release_firmware(adev->gfx.ce_fw);
1244 adev->gfx.ce_fw = NULL;
1245 release_firmware(adev->gfx.rlc_fw);
1246 adev->gfx.rlc_fw = NULL;
1247 release_firmware(adev->gfx.mec_fw);
1248 adev->gfx.mec_fw = NULL;
1249 release_firmware(adev->gfx.mec2_fw);
1250 adev->gfx.mec2_fw = NULL;
1255 static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
1256 volatile u32 *buffer)
1259 const struct cs_section_def *sect = NULL;
1260 const struct cs_extent_def *ext = NULL;
1262 if (adev->gfx.rlc.cs_data == NULL)
1267 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1268 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1270 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1271 buffer[count++] = cpu_to_le32(0x80000000);
1272 buffer[count++] = cpu_to_le32(0x80000000);
1274 for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1275 for (ext = sect->section; ext->extent != NULL; ++ext) {
1276 if (sect->id == SECT_CONTEXT) {
1278 cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1279 buffer[count++] = cpu_to_le32(ext->reg_index -
1280 PACKET3_SET_CONTEXT_REG_START);
1281 for (i = 0; i < ext->reg_count; i++)
1282 buffer[count++] = cpu_to_le32(ext->extent[i]);
1289 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1290 buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
1291 PACKET3_SET_CONTEXT_REG_START);
1292 buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config);
1293 buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1);
1295 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1296 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1298 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1299 buffer[count++] = cpu_to_le32(0);
1302 static void cz_init_cp_jump_table(struct amdgpu_device *adev)
1304 const __le32 *fw_data;
1305 volatile u32 *dst_ptr;
1306 int me, i, max_me = 4;
1308 u32 table_offset, table_size;
1310 if (adev->asic_type == CHIP_CARRIZO)
1313 /* write the cp table buffer */
1314 dst_ptr = adev->gfx.rlc.cp_table_ptr;
1315 for (me = 0; me < max_me; me++) {
1317 const struct gfx_firmware_header_v1_0 *hdr =
1318 (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1319 fw_data = (const __le32 *)
1320 (adev->gfx.ce_fw->data +
1321 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1322 table_offset = le32_to_cpu(hdr->jt_offset);
1323 table_size = le32_to_cpu(hdr->jt_size);
1324 } else if (me == 1) {
1325 const struct gfx_firmware_header_v1_0 *hdr =
1326 (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1327 fw_data = (const __le32 *)
1328 (adev->gfx.pfp_fw->data +
1329 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1330 table_offset = le32_to_cpu(hdr->jt_offset);
1331 table_size = le32_to_cpu(hdr->jt_size);
1332 } else if (me == 2) {
1333 const struct gfx_firmware_header_v1_0 *hdr =
1334 (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1335 fw_data = (const __le32 *)
1336 (adev->gfx.me_fw->data +
1337 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1338 table_offset = le32_to_cpu(hdr->jt_offset);
1339 table_size = le32_to_cpu(hdr->jt_size);
1340 } else if (me == 3) {
1341 const struct gfx_firmware_header_v1_0 *hdr =
1342 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1343 fw_data = (const __le32 *)
1344 (adev->gfx.mec_fw->data +
1345 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1346 table_offset = le32_to_cpu(hdr->jt_offset);
1347 table_size = le32_to_cpu(hdr->jt_size);
1348 } else if (me == 4) {
1349 const struct gfx_firmware_header_v1_0 *hdr =
1350 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
1351 fw_data = (const __le32 *)
1352 (adev->gfx.mec2_fw->data +
1353 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1354 table_offset = le32_to_cpu(hdr->jt_offset);
1355 table_size = le32_to_cpu(hdr->jt_size);
1358 for (i = 0; i < table_size; i ++) {
1359 dst_ptr[bo_offset + i] =
1360 cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
1363 bo_offset += table_size;
1367 static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev)
1369 amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, NULL, NULL);
1370 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, NULL, NULL);
1373 static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
1375 volatile u32 *dst_ptr;
1377 const struct cs_section_def *cs_data;
1380 adev->gfx.rlc.cs_data = vi_cs_data;
1382 cs_data = adev->gfx.rlc.cs_data;
1385 /* clear state block */
1386 adev->gfx.rlc.clear_state_size = dws = gfx_v8_0_get_csb_size(adev);
1388 r = amdgpu_bo_create_reserved(adev, dws * 4, PAGE_SIZE,
1389 AMDGPU_GEM_DOMAIN_VRAM,
1390 &adev->gfx.rlc.clear_state_obj,
1391 &adev->gfx.rlc.clear_state_gpu_addr,
1392 (void **)&adev->gfx.rlc.cs_ptr);
1394 dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r);
1395 gfx_v8_0_rlc_fini(adev);
1399 /* set up the cs buffer */
1400 dst_ptr = adev->gfx.rlc.cs_ptr;
1401 gfx_v8_0_get_csb_buffer(adev, dst_ptr);
1402 amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
1403 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1406 if ((adev->asic_type == CHIP_CARRIZO) ||
1407 (adev->asic_type == CHIP_STONEY)) {
1408 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1409 r = amdgpu_bo_create_reserved(adev, adev->gfx.rlc.cp_table_size,
1410 PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
1411 &adev->gfx.rlc.cp_table_obj,
1412 &adev->gfx.rlc.cp_table_gpu_addr,
1413 (void **)&adev->gfx.rlc.cp_table_ptr);
1415 dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r);
1419 cz_init_cp_jump_table(adev);
1421 amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj);
1422 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1428 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
1430 amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1433 static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
1437 size_t mec_hpd_size;
1439 bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1441 /* take ownership of the relevant compute queues */
1442 amdgpu_gfx_compute_queue_acquire(adev);
1444 mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE;
1446 r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1447 AMDGPU_GEM_DOMAIN_GTT,
1448 &adev->gfx.mec.hpd_eop_obj,
1449 &adev->gfx.mec.hpd_eop_gpu_addr,
1452 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1456 memset(hpd, 0, mec_hpd_size);
1458 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1459 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1464 static const u32 vgpr_init_compute_shader[] =
1466 0x7e000209, 0x7e020208,
1467 0x7e040207, 0x7e060206,
1468 0x7e080205, 0x7e0a0204,
1469 0x7e0c0203, 0x7e0e0202,
1470 0x7e100201, 0x7e120200,
1471 0x7e140209, 0x7e160208,
1472 0x7e180207, 0x7e1a0206,
1473 0x7e1c0205, 0x7e1e0204,
1474 0x7e200203, 0x7e220202,
1475 0x7e240201, 0x7e260200,
1476 0x7e280209, 0x7e2a0208,
1477 0x7e2c0207, 0x7e2e0206,
1478 0x7e300205, 0x7e320204,
1479 0x7e340203, 0x7e360202,
1480 0x7e380201, 0x7e3a0200,
1481 0x7e3c0209, 0x7e3e0208,
1482 0x7e400207, 0x7e420206,
1483 0x7e440205, 0x7e460204,
1484 0x7e480203, 0x7e4a0202,
1485 0x7e4c0201, 0x7e4e0200,
1486 0x7e500209, 0x7e520208,
1487 0x7e540207, 0x7e560206,
1488 0x7e580205, 0x7e5a0204,
1489 0x7e5c0203, 0x7e5e0202,
1490 0x7e600201, 0x7e620200,
1491 0x7e640209, 0x7e660208,
1492 0x7e680207, 0x7e6a0206,
1493 0x7e6c0205, 0x7e6e0204,
1494 0x7e700203, 0x7e720202,
1495 0x7e740201, 0x7e760200,
1496 0x7e780209, 0x7e7a0208,
1497 0x7e7c0207, 0x7e7e0206,
1498 0xbf8a0000, 0xbf810000,
1501 static const u32 sgpr_init_compute_shader[] =
1503 0xbe8a0100, 0xbe8c0102,
1504 0xbe8e0104, 0xbe900106,
1505 0xbe920108, 0xbe940100,
1506 0xbe960102, 0xbe980104,
1507 0xbe9a0106, 0xbe9c0108,
1508 0xbe9e0100, 0xbea00102,
1509 0xbea20104, 0xbea40106,
1510 0xbea60108, 0xbea80100,
1511 0xbeaa0102, 0xbeac0104,
1512 0xbeae0106, 0xbeb00108,
1513 0xbeb20100, 0xbeb40102,
1514 0xbeb60104, 0xbeb80106,
1515 0xbeba0108, 0xbebc0100,
1516 0xbebe0102, 0xbec00104,
1517 0xbec20106, 0xbec40108,
1518 0xbec60100, 0xbec80102,
1519 0xbee60004, 0xbee70005,
1520 0xbeea0006, 0xbeeb0007,
1521 0xbee80008, 0xbee90009,
1522 0xbefc0000, 0xbf8a0000,
1523 0xbf810000, 0x00000000,
1526 static const u32 vgpr_init_regs[] =
1528 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1529 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */
1530 mmCOMPUTE_NUM_THREAD_X, 256*4,
1531 mmCOMPUTE_NUM_THREAD_Y, 1,
1532 mmCOMPUTE_NUM_THREAD_Z, 1,
1533 mmCOMPUTE_PGM_RSRC1, 0x100004f, /* VGPRS=15 (64 logical VGPRs), SGPRS=1 (16 SGPRs), BULKY=1 */
1534 mmCOMPUTE_PGM_RSRC2, 20,
1535 mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1536 mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1537 mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1538 mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1539 mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1540 mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1541 mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1542 mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1543 mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1544 mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1547 static const u32 sgpr1_init_regs[] =
1549 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1550 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */
1551 mmCOMPUTE_NUM_THREAD_X, 256*5,
1552 mmCOMPUTE_NUM_THREAD_Y, 1,
1553 mmCOMPUTE_NUM_THREAD_Z, 1,
1554 mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */
1555 mmCOMPUTE_PGM_RSRC2, 20,
1556 mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1557 mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1558 mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1559 mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1560 mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1561 mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1562 mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1563 mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1564 mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1565 mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1568 static const u32 sgpr2_init_regs[] =
1570 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1571 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1572 mmCOMPUTE_NUM_THREAD_X, 256*5,
1573 mmCOMPUTE_NUM_THREAD_Y, 1,
1574 mmCOMPUTE_NUM_THREAD_Z, 1,
1575 mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */
1576 mmCOMPUTE_PGM_RSRC2, 20,
1577 mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1578 mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1579 mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1580 mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1581 mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1582 mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1583 mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1584 mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1585 mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1586 mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1589 static const u32 sec_ded_counter_registers[] =
1592 mmCPC_EDC_SCRATCH_CNT,
1593 mmCPC_EDC_UCODE_CNT,
1600 mmDC_EDC_CSINVOC_CNT,
1601 mmDC_EDC_RESTORE_CNT,
1607 mmSQC_ATC_EDC_GATCL1_CNT,
1613 mmTCP_ATC_EDC_GATCL1_CNT,
1618 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1620 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1621 struct amdgpu_ib ib;
1622 struct dma_fence *f = NULL;
1625 unsigned total_size, vgpr_offset, sgpr_offset;
1628 /* only supported on CZ */
1629 if (adev->asic_type != CHIP_CARRIZO)
1632 /* bail if the compute ring is not ready */
1636 tmp = RREG32(mmGB_EDC_MODE);
1637 WREG32(mmGB_EDC_MODE, 0);
1640 (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1642 (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1644 (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1645 total_size = ALIGN(total_size, 256);
1646 vgpr_offset = total_size;
1647 total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1648 sgpr_offset = total_size;
1649 total_size += sizeof(sgpr_init_compute_shader);
1651 /* allocate an indirect buffer to put the commands in */
1652 memset(&ib, 0, sizeof(ib));
1653 r = amdgpu_ib_get(adev, NULL, total_size, &ib);
1655 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1659 /* load the compute shaders */
1660 for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1661 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1663 for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1664 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1666 /* init the ib length to 0 */
1670 /* write the register state for the compute dispatch */
1671 for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1672 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1673 ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1674 ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1676 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1677 gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1678 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1679 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1680 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1681 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1683 /* write dispatch packet */
1684 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1685 ib.ptr[ib.length_dw++] = 8; /* x */
1686 ib.ptr[ib.length_dw++] = 1; /* y */
1687 ib.ptr[ib.length_dw++] = 1; /* z */
1688 ib.ptr[ib.length_dw++] =
1689 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1691 /* write CS partial flush packet */
1692 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1693 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1696 /* write the register state for the compute dispatch */
1697 for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1698 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1699 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1700 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1702 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1703 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1704 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1705 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1706 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1707 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1709 /* write dispatch packet */
1710 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1711 ib.ptr[ib.length_dw++] = 8; /* x */
1712 ib.ptr[ib.length_dw++] = 1; /* y */
1713 ib.ptr[ib.length_dw++] = 1; /* z */
1714 ib.ptr[ib.length_dw++] =
1715 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1717 /* write CS partial flush packet */
1718 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1719 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1722 /* write the register state for the compute dispatch */
1723 for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1724 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1725 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1726 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1728 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1729 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1730 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1731 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1732 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1733 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1735 /* write dispatch packet */
1736 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1737 ib.ptr[ib.length_dw++] = 8; /* x */
1738 ib.ptr[ib.length_dw++] = 1; /* y */
1739 ib.ptr[ib.length_dw++] = 1; /* z */
1740 ib.ptr[ib.length_dw++] =
1741 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1743 /* write CS partial flush packet */
1744 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1745 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1747 /* shedule the ib on the ring */
1748 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1750 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1754 /* wait for the GPU to finish processing the IB */
1755 r = dma_fence_wait(f, false);
1757 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1761 tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1762 tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1763 WREG32(mmGB_EDC_MODE, tmp);
1765 tmp = RREG32(mmCC_GC_EDC_CONFIG);
1766 tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1767 WREG32(mmCC_GC_EDC_CONFIG, tmp);
1770 /* read back registers to clear the counters */
1771 for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1772 RREG32(sec_ded_counter_registers[i]);
1775 amdgpu_ib_free(adev, &ib, NULL);
1781 static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1784 u32 mc_shared_chmap, mc_arb_ramcfg;
1785 u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1789 switch (adev->asic_type) {
1791 adev->gfx.config.max_shader_engines = 1;
1792 adev->gfx.config.max_tile_pipes = 2;
1793 adev->gfx.config.max_cu_per_sh = 6;
1794 adev->gfx.config.max_sh_per_se = 1;
1795 adev->gfx.config.max_backends_per_se = 2;
1796 adev->gfx.config.max_texture_channel_caches = 2;
1797 adev->gfx.config.max_gprs = 256;
1798 adev->gfx.config.max_gs_threads = 32;
1799 adev->gfx.config.max_hw_contexts = 8;
1801 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1802 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1803 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1804 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1805 gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1808 adev->gfx.config.max_shader_engines = 4;
1809 adev->gfx.config.max_tile_pipes = 16;
1810 adev->gfx.config.max_cu_per_sh = 16;
1811 adev->gfx.config.max_sh_per_se = 1;
1812 adev->gfx.config.max_backends_per_se = 4;
1813 adev->gfx.config.max_texture_channel_caches = 16;
1814 adev->gfx.config.max_gprs = 256;
1815 adev->gfx.config.max_gs_threads = 32;
1816 adev->gfx.config.max_hw_contexts = 8;
1818 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1819 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1820 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1821 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1822 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1824 case CHIP_POLARIS11:
1825 case CHIP_POLARIS12:
1826 ret = amdgpu_atombios_get_gfx_info(adev);
1829 adev->gfx.config.max_gprs = 256;
1830 adev->gfx.config.max_gs_threads = 32;
1831 adev->gfx.config.max_hw_contexts = 8;
1833 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1834 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1835 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1836 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1837 gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
1839 case CHIP_POLARIS10:
1841 ret = amdgpu_atombios_get_gfx_info(adev);
1844 adev->gfx.config.max_gprs = 256;
1845 adev->gfx.config.max_gs_threads = 32;
1846 adev->gfx.config.max_hw_contexts = 8;
1848 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1849 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1850 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1851 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1852 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1855 adev->gfx.config.max_shader_engines = 4;
1856 adev->gfx.config.max_tile_pipes = 8;
1857 adev->gfx.config.max_cu_per_sh = 8;
1858 adev->gfx.config.max_sh_per_se = 1;
1859 adev->gfx.config.max_backends_per_se = 2;
1860 adev->gfx.config.max_texture_channel_caches = 8;
1861 adev->gfx.config.max_gprs = 256;
1862 adev->gfx.config.max_gs_threads = 32;
1863 adev->gfx.config.max_hw_contexts = 8;
1865 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1866 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1867 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1868 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1869 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1872 adev->gfx.config.max_shader_engines = 1;
1873 adev->gfx.config.max_tile_pipes = 2;
1874 adev->gfx.config.max_sh_per_se = 1;
1875 adev->gfx.config.max_backends_per_se = 2;
1876 adev->gfx.config.max_cu_per_sh = 8;
1877 adev->gfx.config.max_texture_channel_caches = 2;
1878 adev->gfx.config.max_gprs = 256;
1879 adev->gfx.config.max_gs_threads = 32;
1880 adev->gfx.config.max_hw_contexts = 8;
1882 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1883 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1884 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1885 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1886 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1889 adev->gfx.config.max_shader_engines = 1;
1890 adev->gfx.config.max_tile_pipes = 2;
1891 adev->gfx.config.max_sh_per_se = 1;
1892 adev->gfx.config.max_backends_per_se = 1;
1893 adev->gfx.config.max_cu_per_sh = 3;
1894 adev->gfx.config.max_texture_channel_caches = 2;
1895 adev->gfx.config.max_gprs = 256;
1896 adev->gfx.config.max_gs_threads = 16;
1897 adev->gfx.config.max_hw_contexts = 8;
1899 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1900 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1901 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1902 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1903 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1906 adev->gfx.config.max_shader_engines = 2;
1907 adev->gfx.config.max_tile_pipes = 4;
1908 adev->gfx.config.max_cu_per_sh = 2;
1909 adev->gfx.config.max_sh_per_se = 1;
1910 adev->gfx.config.max_backends_per_se = 2;
1911 adev->gfx.config.max_texture_channel_caches = 4;
1912 adev->gfx.config.max_gprs = 256;
1913 adev->gfx.config.max_gs_threads = 32;
1914 adev->gfx.config.max_hw_contexts = 8;
1916 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1917 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1918 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1919 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1920 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1924 mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
1925 adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1926 mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1928 adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1929 adev->gfx.config.mem_max_burst_length_bytes = 256;
1930 if (adev->flags & AMD_IS_APU) {
1931 /* Get memory bank mapping mode. */
1932 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
1933 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1934 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1936 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
1937 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1938 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1940 /* Validate settings in case only one DIMM installed. */
1941 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
1942 dimm00_addr_map = 0;
1943 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
1944 dimm01_addr_map = 0;
1945 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
1946 dimm10_addr_map = 0;
1947 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
1948 dimm11_addr_map = 0;
1950 /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1951 /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1952 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
1953 adev->gfx.config.mem_row_size_in_kb = 2;
1955 adev->gfx.config.mem_row_size_in_kb = 1;
1957 tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
1958 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1959 if (adev->gfx.config.mem_row_size_in_kb > 4)
1960 adev->gfx.config.mem_row_size_in_kb = 4;
1963 adev->gfx.config.shader_engine_tile_size = 32;
1964 adev->gfx.config.num_gpus = 1;
1965 adev->gfx.config.multi_gpu_tile_size = 64;
1967 /* fix up row size */
1968 switch (adev->gfx.config.mem_row_size_in_kb) {
1971 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
1974 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
1977 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
1980 adev->gfx.config.gb_addr_config = gb_addr_config;
1985 static int gfx_v8_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
1986 int mec, int pipe, int queue)
1990 struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
1992 ring = &adev->gfx.compute_ring[ring_id];
1997 ring->queue = queue;
1999 ring->ring_obj = NULL;
2000 ring->use_doorbell = true;
2001 ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + ring_id;
2002 ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
2003 + (ring_id * GFX8_MEC_HPD_SIZE);
2004 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
2006 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
2007 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
2010 /* type-2 packets are deprecated on MEC, use type-3 instead */
2011 r = amdgpu_ring_init(adev, ring, 1024,
2012 &adev->gfx.eop_irq, irq_type);
2020 static void gfx_v8_0_sq_irq_work_func(struct work_struct *work);
2022 static int gfx_v8_0_sw_init(void *handle)
2024 int i, j, k, r, ring_id;
2025 struct amdgpu_ring *ring;
2026 struct amdgpu_kiq *kiq;
2027 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2029 switch (adev->asic_type) {
2033 case CHIP_POLARIS10:
2034 case CHIP_POLARIS11:
2035 case CHIP_POLARIS12:
2037 adev->gfx.mec.num_mec = 2;
2042 adev->gfx.mec.num_mec = 1;
2046 adev->gfx.mec.num_pipe_per_mec = 4;
2047 adev->gfx.mec.num_queue_per_pipe = 8;
2050 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 178, &adev->gfx.kiq.irq);
2055 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 181, &adev->gfx.eop_irq);
2059 /* Privileged reg */
2060 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 184,
2061 &adev->gfx.priv_reg_irq);
2065 /* Privileged inst */
2066 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 185,
2067 &adev->gfx.priv_inst_irq);
2071 /* Add CP EDC/ECC irq */
2072 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 197,
2073 &adev->gfx.cp_ecc_error_irq);
2077 /* SQ interrupts. */
2078 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 239,
2081 DRM_ERROR("amdgpu_irq_add() for SQ failed: %d\n", r);
2085 INIT_WORK(&adev->gfx.sq_work.work, gfx_v8_0_sq_irq_work_func);
2087 adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2089 gfx_v8_0_scratch_init(adev);
2091 r = gfx_v8_0_init_microcode(adev);
2093 DRM_ERROR("Failed to load gfx firmware!\n");
2097 r = gfx_v8_0_rlc_init(adev);
2099 DRM_ERROR("Failed to init rlc BOs!\n");
2103 r = gfx_v8_0_mec_init(adev);
2105 DRM_ERROR("Failed to init MEC BOs!\n");
2109 /* set up the gfx ring */
2110 for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2111 ring = &adev->gfx.gfx_ring[i];
2112 ring->ring_obj = NULL;
2113 sprintf(ring->name, "gfx");
2114 /* no gfx doorbells on iceland */
2115 if (adev->asic_type != CHIP_TOPAZ) {
2116 ring->use_doorbell = true;
2117 ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0;
2120 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2121 AMDGPU_CP_IRQ_GFX_EOP);
2127 /* set up the compute queues - allocate horizontally across pipes */
2129 for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
2130 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
2131 for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
2132 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
2135 r = gfx_v8_0_compute_ring_init(adev,
2146 r = amdgpu_gfx_kiq_init(adev, GFX8_MEC_HPD_SIZE);
2148 DRM_ERROR("Failed to init KIQ BOs!\n");
2152 kiq = &adev->gfx.kiq;
2153 r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
2157 /* create MQD for all compute queues as well as KIQ for SRIOV case */
2158 r = amdgpu_gfx_compute_mqd_sw_init(adev, sizeof(struct vi_mqd_allocation));
2162 /* reserve GDS, GWS and OA resource for gfx */
2163 r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size,
2164 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS,
2165 &adev->gds.gds_gfx_bo, NULL, NULL);
2169 r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size,
2170 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS,
2171 &adev->gds.gws_gfx_bo, NULL, NULL);
2175 r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size,
2176 PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA,
2177 &adev->gds.oa_gfx_bo, NULL, NULL);
2181 adev->gfx.ce_ram_size = 0x8000;
2183 r = gfx_v8_0_gpu_early_init(adev);
2190 static int gfx_v8_0_sw_fini(void *handle)
2193 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2195 amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
2196 amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL);
2197 amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
2199 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2200 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2201 for (i = 0; i < adev->gfx.num_compute_rings; i++)
2202 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2204 amdgpu_gfx_compute_mqd_sw_fini(adev);
2205 amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
2206 amdgpu_gfx_kiq_fini(adev);
2208 gfx_v8_0_mec_fini(adev);
2209 gfx_v8_0_rlc_fini(adev);
2210 amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
2211 &adev->gfx.rlc.clear_state_gpu_addr,
2212 (void **)&adev->gfx.rlc.cs_ptr);
2213 if ((adev->asic_type == CHIP_CARRIZO) ||
2214 (adev->asic_type == CHIP_STONEY)) {
2215 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
2216 &adev->gfx.rlc.cp_table_gpu_addr,
2217 (void **)&adev->gfx.rlc.cp_table_ptr);
2219 gfx_v8_0_free_microcode(adev);
2224 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
2226 uint32_t *modearray, *mod2array;
2227 const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
2228 const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
2231 modearray = adev->gfx.config.tile_mode_array;
2232 mod2array = adev->gfx.config.macrotile_mode_array;
2234 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2235 modearray[reg_offset] = 0;
2237 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2238 mod2array[reg_offset] = 0;
2240 switch (adev->asic_type) {
2242 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2243 PIPE_CONFIG(ADDR_SURF_P2) |
2244 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2245 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2246 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2247 PIPE_CONFIG(ADDR_SURF_P2) |
2248 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2249 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2250 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2251 PIPE_CONFIG(ADDR_SURF_P2) |
2252 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2253 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2254 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2255 PIPE_CONFIG(ADDR_SURF_P2) |
2256 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2257 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2258 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2259 PIPE_CONFIG(ADDR_SURF_P2) |
2260 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2261 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2262 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2263 PIPE_CONFIG(ADDR_SURF_P2) |
2264 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2265 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2266 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2267 PIPE_CONFIG(ADDR_SURF_P2) |
2268 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2269 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2270 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2271 PIPE_CONFIG(ADDR_SURF_P2));
2272 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2273 PIPE_CONFIG(ADDR_SURF_P2) |
2274 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2275 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2276 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2277 PIPE_CONFIG(ADDR_SURF_P2) |
2278 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2279 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2280 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2281 PIPE_CONFIG(ADDR_SURF_P2) |
2282 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2283 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2284 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2285 PIPE_CONFIG(ADDR_SURF_P2) |
2286 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2287 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2288 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2289 PIPE_CONFIG(ADDR_SURF_P2) |
2290 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2291 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2292 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2293 PIPE_CONFIG(ADDR_SURF_P2) |
2294 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2295 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2296 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2297 PIPE_CONFIG(ADDR_SURF_P2) |
2298 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2299 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2300 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2301 PIPE_CONFIG(ADDR_SURF_P2) |
2302 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2303 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2304 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2305 PIPE_CONFIG(ADDR_SURF_P2) |
2306 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2307 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2308 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2309 PIPE_CONFIG(ADDR_SURF_P2) |
2310 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2311 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2312 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2313 PIPE_CONFIG(ADDR_SURF_P2) |
2314 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2315 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2316 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2317 PIPE_CONFIG(ADDR_SURF_P2) |
2318 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2319 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2320 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2321 PIPE_CONFIG(ADDR_SURF_P2) |
2322 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2323 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2324 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2325 PIPE_CONFIG(ADDR_SURF_P2) |
2326 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2327 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2328 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2329 PIPE_CONFIG(ADDR_SURF_P2) |
2330 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2331 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2332 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2333 PIPE_CONFIG(ADDR_SURF_P2) |
2334 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2335 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2336 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2337 PIPE_CONFIG(ADDR_SURF_P2) |
2338 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2339 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2340 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2341 PIPE_CONFIG(ADDR_SURF_P2) |
2342 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2343 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2345 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2346 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2347 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2348 NUM_BANKS(ADDR_SURF_8_BANK));
2349 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2350 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2351 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2352 NUM_BANKS(ADDR_SURF_8_BANK));
2353 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2354 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2355 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2356 NUM_BANKS(ADDR_SURF_8_BANK));
2357 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2358 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2359 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2360 NUM_BANKS(ADDR_SURF_8_BANK));
2361 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2362 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2363 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2364 NUM_BANKS(ADDR_SURF_8_BANK));
2365 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2366 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2367 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2368 NUM_BANKS(ADDR_SURF_8_BANK));
2369 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2370 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2371 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2372 NUM_BANKS(ADDR_SURF_8_BANK));
2373 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2374 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2375 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2376 NUM_BANKS(ADDR_SURF_16_BANK));
2377 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2378 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2379 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2380 NUM_BANKS(ADDR_SURF_16_BANK));
2381 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2382 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2383 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2384 NUM_BANKS(ADDR_SURF_16_BANK));
2385 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2386 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2387 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2388 NUM_BANKS(ADDR_SURF_16_BANK));
2389 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2390 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2391 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2392 NUM_BANKS(ADDR_SURF_16_BANK));
2393 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2394 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2395 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2396 NUM_BANKS(ADDR_SURF_16_BANK));
2397 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2398 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2399 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2400 NUM_BANKS(ADDR_SURF_8_BANK));
2402 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2403 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2405 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2407 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2408 if (reg_offset != 7)
2409 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2414 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2415 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2416 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2417 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2418 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2419 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2420 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2421 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2422 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2423 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2424 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2425 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2426 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2427 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2428 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2429 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2430 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2431 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2432 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2433 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2434 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2435 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2436 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2437 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2438 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2439 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2440 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2441 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2442 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2443 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2444 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2445 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2446 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2447 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2448 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2449 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2450 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2451 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2452 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2453 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2454 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2455 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2456 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2457 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2458 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2459 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2460 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2461 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2462 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2463 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2464 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2465 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2466 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2467 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2468 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2469 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2470 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2471 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2472 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2473 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2474 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2475 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2476 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2477 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2478 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2479 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2480 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2481 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2482 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2483 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2484 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2485 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2486 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2487 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2488 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2489 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2490 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2491 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2492 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2493 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2494 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2495 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2496 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2497 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2498 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2499 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2500 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2501 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2502 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2503 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2504 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2505 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2506 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2507 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2508 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2509 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2510 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2511 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2512 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2513 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2514 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2515 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2516 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2517 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2518 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2519 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2520 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2521 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2522 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2523 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2524 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2525 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2526 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2527 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2528 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2529 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2530 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2531 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2532 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2533 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2534 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2535 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2537 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2538 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2539 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2540 NUM_BANKS(ADDR_SURF_8_BANK));
2541 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2542 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2543 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2544 NUM_BANKS(ADDR_SURF_8_BANK));
2545 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2546 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2547 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2548 NUM_BANKS(ADDR_SURF_8_BANK));
2549 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2550 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2551 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2552 NUM_BANKS(ADDR_SURF_8_BANK));
2553 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2554 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2555 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2556 NUM_BANKS(ADDR_SURF_8_BANK));
2557 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2558 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2559 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2560 NUM_BANKS(ADDR_SURF_8_BANK));
2561 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2562 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2563 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2564 NUM_BANKS(ADDR_SURF_8_BANK));
2565 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2566 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2567 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2568 NUM_BANKS(ADDR_SURF_8_BANK));
2569 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2570 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2571 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2572 NUM_BANKS(ADDR_SURF_8_BANK));
2573 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2574 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2575 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2576 NUM_BANKS(ADDR_SURF_8_BANK));
2577 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2578 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2579 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2580 NUM_BANKS(ADDR_SURF_8_BANK));
2581 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2582 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2583 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2584 NUM_BANKS(ADDR_SURF_8_BANK));
2585 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2586 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2587 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2588 NUM_BANKS(ADDR_SURF_8_BANK));
2589 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2590 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2591 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2592 NUM_BANKS(ADDR_SURF_4_BANK));
2594 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2595 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2597 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2598 if (reg_offset != 7)
2599 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2603 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2604 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2605 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2606 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2607 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2608 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2609 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2610 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2611 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2612 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2613 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2614 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2615 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2616 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2617 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2618 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2619 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2620 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2621 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2622 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2623 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2624 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2625 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2626 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2627 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2628 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2629 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2630 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2631 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2632 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2633 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2634 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2635 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2636 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2637 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2638 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2639 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2640 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2641 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2642 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2643 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2644 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2645 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2646 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2647 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2648 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2649 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2650 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2651 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2652 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2653 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2654 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2655 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2656 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2657 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2658 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2659 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2660 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2661 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2662 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2663 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2664 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2665 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2666 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2667 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2668 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2669 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2670 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2671 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2672 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2673 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2674 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2675 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2676 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2677 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2678 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2679 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2680 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2681 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2682 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2683 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2684 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2685 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2686 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2687 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2688 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2689 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2690 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2691 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2692 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2693 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2694 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2695 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2696 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2697 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2698 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2699 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2700 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2701 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2702 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2703 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2704 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2705 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2706 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2707 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2708 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2709 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2710 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2711 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2712 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2713 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2714 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2715 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2716 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2717 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2718 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2719 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2720 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2721 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2722 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2723 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2724 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2726 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2727 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2728 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2729 NUM_BANKS(ADDR_SURF_16_BANK));
2730 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2731 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2732 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2733 NUM_BANKS(ADDR_SURF_16_BANK));
2734 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2735 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2736 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2737 NUM_BANKS(ADDR_SURF_16_BANK));
2738 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2739 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2740 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2741 NUM_BANKS(ADDR_SURF_16_BANK));
2742 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2743 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2744 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2745 NUM_BANKS(ADDR_SURF_16_BANK));
2746 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2747 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2748 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2749 NUM_BANKS(ADDR_SURF_16_BANK));
2750 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2751 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2752 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2753 NUM_BANKS(ADDR_SURF_16_BANK));
2754 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2755 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2756 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2757 NUM_BANKS(ADDR_SURF_16_BANK));
2758 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2759 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2760 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2761 NUM_BANKS(ADDR_SURF_16_BANK));
2762 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2763 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2764 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2765 NUM_BANKS(ADDR_SURF_16_BANK));
2766 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2767 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2768 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2769 NUM_BANKS(ADDR_SURF_16_BANK));
2770 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2771 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2772 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2773 NUM_BANKS(ADDR_SURF_8_BANK));
2774 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2775 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2776 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2777 NUM_BANKS(ADDR_SURF_4_BANK));
2778 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2779 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2780 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2781 NUM_BANKS(ADDR_SURF_4_BANK));
2783 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2784 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2786 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2787 if (reg_offset != 7)
2788 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2791 case CHIP_POLARIS11:
2792 case CHIP_POLARIS12:
2793 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2794 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2795 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2796 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2797 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2798 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2799 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2800 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2801 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2802 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2803 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2804 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2805 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2806 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2807 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2808 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2809 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2810 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2811 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2812 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2813 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2814 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2815 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2816 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2817 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2818 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2819 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2820 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2821 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2822 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2823 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2824 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2825 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2826 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2827 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2828 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2829 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2830 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2831 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2832 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2833 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2834 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2835 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2836 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2837 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2838 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2839 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2840 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2841 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2842 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2843 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2844 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2845 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2846 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2847 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2848 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2849 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2850 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2851 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2852 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2853 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2854 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2855 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2856 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2857 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2858 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2859 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2860 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2861 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2862 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2863 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2864 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2865 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2866 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2867 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2868 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2869 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2870 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2871 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2872 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2873 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2874 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2875 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2876 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2877 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2878 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2879 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2880 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2881 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2882 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2883 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2884 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2885 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2886 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2887 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2888 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2889 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2890 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2891 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2892 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2893 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2894 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2895 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2896 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2897 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2898 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2899 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2900 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2901 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2902 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2903 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2904 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2905 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2906 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2907 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2908 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2909 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2910 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2911 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2912 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2913 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2914 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2916 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2917 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2918 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2919 NUM_BANKS(ADDR_SURF_16_BANK));
2921 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2922 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2923 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2924 NUM_BANKS(ADDR_SURF_16_BANK));
2926 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2927 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2928 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2929 NUM_BANKS(ADDR_SURF_16_BANK));
2931 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2932 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2933 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2934 NUM_BANKS(ADDR_SURF_16_BANK));
2936 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2937 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2938 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2939 NUM_BANKS(ADDR_SURF_16_BANK));
2941 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2942 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2943 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2944 NUM_BANKS(ADDR_SURF_16_BANK));
2946 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2947 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2948 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2949 NUM_BANKS(ADDR_SURF_16_BANK));
2951 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2952 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2953 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2954 NUM_BANKS(ADDR_SURF_16_BANK));
2956 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2957 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2958 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2959 NUM_BANKS(ADDR_SURF_16_BANK));
2961 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2962 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2963 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2964 NUM_BANKS(ADDR_SURF_16_BANK));
2966 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2967 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2968 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2969 NUM_BANKS(ADDR_SURF_16_BANK));
2971 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2972 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2973 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2974 NUM_BANKS(ADDR_SURF_16_BANK));
2976 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2977 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2978 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2979 NUM_BANKS(ADDR_SURF_8_BANK));
2981 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2982 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2983 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2984 NUM_BANKS(ADDR_SURF_4_BANK));
2986 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2987 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2989 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2990 if (reg_offset != 7)
2991 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2994 case CHIP_POLARIS10:
2995 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2996 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2997 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2998 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2999 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3000 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3001 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3002 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3003 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3004 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3005 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3006 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3007 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3008 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3009 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3010 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3011 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3012 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3013 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3014 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3015 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3016 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3017 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3018 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3019 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3020 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3021 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3022 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3023 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3024 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3025 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3026 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3027 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3028 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
3029 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3030 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3031 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3032 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3033 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3034 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3035 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3036 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3037 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3038 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3039 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3040 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3041 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3042 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3043 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3044 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3045 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3046 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3047 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3048 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3049 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3050 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3051 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3052 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3053 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3054 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3055 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3056 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3057 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3058 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3059 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3060 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3061 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3062 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3063 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3064 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3065 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3066 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3067 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3068 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3069 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3070 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3071 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3072 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3073 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3074 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3075 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3076 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3077 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3078 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3079 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3080 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3081 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3082 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3083 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3084 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3085 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3086 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3087 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3088 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3089 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3090 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3091 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3092 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3093 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3094 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3095 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3096 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3097 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3098 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3099 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3100 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3101 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3102 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3103 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3104 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3105 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3106 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3107 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3108 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3109 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3110 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3111 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3112 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3113 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3114 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3115 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3116 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3118 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3119 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3120 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3121 NUM_BANKS(ADDR_SURF_16_BANK));
3123 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3124 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3125 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3126 NUM_BANKS(ADDR_SURF_16_BANK));
3128 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3129 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3130 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3131 NUM_BANKS(ADDR_SURF_16_BANK));
3133 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3134 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3135 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3136 NUM_BANKS(ADDR_SURF_16_BANK));
3138 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3139 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3140 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3141 NUM_BANKS(ADDR_SURF_16_BANK));
3143 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3144 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3145 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3146 NUM_BANKS(ADDR_SURF_16_BANK));
3148 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3149 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3150 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3151 NUM_BANKS(ADDR_SURF_16_BANK));
3153 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3154 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3155 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3156 NUM_BANKS(ADDR_SURF_16_BANK));
3158 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3159 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3160 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3161 NUM_BANKS(ADDR_SURF_16_BANK));
3163 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3164 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3165 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3166 NUM_BANKS(ADDR_SURF_16_BANK));
3168 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3169 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3170 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3171 NUM_BANKS(ADDR_SURF_16_BANK));
3173 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3174 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3175 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3176 NUM_BANKS(ADDR_SURF_8_BANK));
3178 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3179 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3180 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3181 NUM_BANKS(ADDR_SURF_4_BANK));
3183 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3184 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3185 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3186 NUM_BANKS(ADDR_SURF_4_BANK));
3188 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3189 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3191 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3192 if (reg_offset != 7)
3193 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3197 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3198 PIPE_CONFIG(ADDR_SURF_P2) |
3199 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3200 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3201 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3202 PIPE_CONFIG(ADDR_SURF_P2) |
3203 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3204 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3205 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3206 PIPE_CONFIG(ADDR_SURF_P2) |
3207 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3208 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3209 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3210 PIPE_CONFIG(ADDR_SURF_P2) |
3211 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3212 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3213 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3214 PIPE_CONFIG(ADDR_SURF_P2) |
3215 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3216 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3217 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3218 PIPE_CONFIG(ADDR_SURF_P2) |
3219 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3220 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3221 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3222 PIPE_CONFIG(ADDR_SURF_P2) |
3223 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3224 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3225 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3226 PIPE_CONFIG(ADDR_SURF_P2));
3227 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3228 PIPE_CONFIG(ADDR_SURF_P2) |
3229 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3230 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3231 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3232 PIPE_CONFIG(ADDR_SURF_P2) |
3233 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3234 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3235 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3236 PIPE_CONFIG(ADDR_SURF_P2) |
3237 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3238 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3239 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3240 PIPE_CONFIG(ADDR_SURF_P2) |
3241 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3242 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3243 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3244 PIPE_CONFIG(ADDR_SURF_P2) |
3245 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3246 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3247 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3248 PIPE_CONFIG(ADDR_SURF_P2) |
3249 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3250 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3251 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3252 PIPE_CONFIG(ADDR_SURF_P2) |
3253 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3254 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3255 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3256 PIPE_CONFIG(ADDR_SURF_P2) |
3257 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3258 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3259 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3260 PIPE_CONFIG(ADDR_SURF_P2) |
3261 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3262 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3263 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3264 PIPE_CONFIG(ADDR_SURF_P2) |
3265 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3266 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3267 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3268 PIPE_CONFIG(ADDR_SURF_P2) |
3269 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3270 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3271 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3272 PIPE_CONFIG(ADDR_SURF_P2) |
3273 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3274 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3275 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3276 PIPE_CONFIG(ADDR_SURF_P2) |
3277 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3278 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3279 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3280 PIPE_CONFIG(ADDR_SURF_P2) |
3281 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3282 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3283 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3284 PIPE_CONFIG(ADDR_SURF_P2) |
3285 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3286 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3287 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3288 PIPE_CONFIG(ADDR_SURF_P2) |
3289 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3290 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3291 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3292 PIPE_CONFIG(ADDR_SURF_P2) |
3293 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3294 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3295 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3296 PIPE_CONFIG(ADDR_SURF_P2) |
3297 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3298 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3300 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3301 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3302 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3303 NUM_BANKS(ADDR_SURF_8_BANK));
3304 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3305 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3306 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3307 NUM_BANKS(ADDR_SURF_8_BANK));
3308 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3309 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3310 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3311 NUM_BANKS(ADDR_SURF_8_BANK));
3312 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3313 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3314 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3315 NUM_BANKS(ADDR_SURF_8_BANK));
3316 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3317 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3318 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3319 NUM_BANKS(ADDR_SURF_8_BANK));
3320 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3321 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3322 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3323 NUM_BANKS(ADDR_SURF_8_BANK));
3324 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3325 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3326 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3327 NUM_BANKS(ADDR_SURF_8_BANK));
3328 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3329 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3330 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3331 NUM_BANKS(ADDR_SURF_16_BANK));
3332 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3333 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3334 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3335 NUM_BANKS(ADDR_SURF_16_BANK));
3336 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3337 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3338 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3339 NUM_BANKS(ADDR_SURF_16_BANK));
3340 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3341 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3342 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3343 NUM_BANKS(ADDR_SURF_16_BANK));
3344 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3345 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3346 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3347 NUM_BANKS(ADDR_SURF_16_BANK));
3348 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3349 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3350 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3351 NUM_BANKS(ADDR_SURF_16_BANK));
3352 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3353 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3354 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3355 NUM_BANKS(ADDR_SURF_8_BANK));
3357 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3358 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3360 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3362 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3363 if (reg_offset != 7)
3364 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3369 "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
3373 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3374 PIPE_CONFIG(ADDR_SURF_P2) |
3375 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3376 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3377 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3378 PIPE_CONFIG(ADDR_SURF_P2) |
3379 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3380 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3381 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3382 PIPE_CONFIG(ADDR_SURF_P2) |
3383 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3384 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3385 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3386 PIPE_CONFIG(ADDR_SURF_P2) |
3387 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3388 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3389 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3390 PIPE_CONFIG(ADDR_SURF_P2) |
3391 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3392 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3393 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3394 PIPE_CONFIG(ADDR_SURF_P2) |
3395 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3396 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3397 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3398 PIPE_CONFIG(ADDR_SURF_P2) |
3399 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3400 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3401 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3402 PIPE_CONFIG(ADDR_SURF_P2));
3403 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3404 PIPE_CONFIG(ADDR_SURF_P2) |
3405 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3406 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3407 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3408 PIPE_CONFIG(ADDR_SURF_P2) |
3409 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3410 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3411 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3412 PIPE_CONFIG(ADDR_SURF_P2) |
3413 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3414 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3415 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3416 PIPE_CONFIG(ADDR_SURF_P2) |
3417 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3418 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3419 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3420 PIPE_CONFIG(ADDR_SURF_P2) |
3421 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3422 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3423 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3424 PIPE_CONFIG(ADDR_SURF_P2) |
3425 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3426 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3427 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3428 PIPE_CONFIG(ADDR_SURF_P2) |
3429 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3430 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3431 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3432 PIPE_CONFIG(ADDR_SURF_P2) |
3433 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3434 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3435 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3436 PIPE_CONFIG(ADDR_SURF_P2) |
3437 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3438 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3439 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3440 PIPE_CONFIG(ADDR_SURF_P2) |
3441 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3442 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3443 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3444 PIPE_CONFIG(ADDR_SURF_P2) |
3445 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3446 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3447 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3448 PIPE_CONFIG(ADDR_SURF_P2) |
3449 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3450 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3451 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3452 PIPE_CONFIG(ADDR_SURF_P2) |
3453 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3454 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3455 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3456 PIPE_CONFIG(ADDR_SURF_P2) |
3457 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3458 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3459 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3460 PIPE_CONFIG(ADDR_SURF_P2) |
3461 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3462 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3463 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3464 PIPE_CONFIG(ADDR_SURF_P2) |
3465 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3466 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3467 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3468 PIPE_CONFIG(ADDR_SURF_P2) |
3469 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3470 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3471 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3472 PIPE_CONFIG(ADDR_SURF_P2) |
3473 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3474 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3476 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3477 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3478 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3479 NUM_BANKS(ADDR_SURF_8_BANK));
3480 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3481 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3482 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3483 NUM_BANKS(ADDR_SURF_8_BANK));
3484 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3485 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3486 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3487 NUM_BANKS(ADDR_SURF_8_BANK));
3488 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3489 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3490 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3491 NUM_BANKS(ADDR_SURF_8_BANK));
3492 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3493 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3494 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3495 NUM_BANKS(ADDR_SURF_8_BANK));
3496 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3497 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3498 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3499 NUM_BANKS(ADDR_SURF_8_BANK));
3500 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3501 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3502 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3503 NUM_BANKS(ADDR_SURF_8_BANK));
3504 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3505 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3506 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3507 NUM_BANKS(ADDR_SURF_16_BANK));
3508 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3509 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3510 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3511 NUM_BANKS(ADDR_SURF_16_BANK));
3512 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3513 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3514 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3515 NUM_BANKS(ADDR_SURF_16_BANK));
3516 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3517 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3518 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3519 NUM_BANKS(ADDR_SURF_16_BANK));
3520 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3521 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3522 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3523 NUM_BANKS(ADDR_SURF_16_BANK));
3524 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3525 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3526 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3527 NUM_BANKS(ADDR_SURF_16_BANK));
3528 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3529 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3530 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3531 NUM_BANKS(ADDR_SURF_8_BANK));
3533 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3534 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3536 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3538 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3539 if (reg_offset != 7)
3540 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3546 static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
3547 u32 se_num, u32 sh_num, u32 instance)
3551 if (instance == 0xffffffff)
3552 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
3554 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
3556 if (se_num == 0xffffffff)
3557 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3559 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3561 if (sh_num == 0xffffffff)
3562 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3564 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3566 WREG32(mmGRBM_GFX_INDEX, data);
3569 static void gfx_v8_0_select_me_pipe_q(struct amdgpu_device *adev,
3570 u32 me, u32 pipe, u32 q)
3572 vi_srbm_select(adev, me, pipe, q, 0);
3575 static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
3579 data = RREG32(mmCC_RB_BACKEND_DISABLE) |
3580 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3582 data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE);
3584 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
3585 adev->gfx.config.max_sh_per_se);
3587 return (~data) & mask;
3591 gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1)
3593 switch (adev->asic_type) {
3596 *rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) |
3597 RB_XSEL2(1) | PKR_MAP(2) |
3598 PKR_XSEL(1) | PKR_YSEL(1) |
3599 SE_MAP(2) | SE_XSEL(2) | SE_YSEL(3);
3600 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) |
3604 case CHIP_POLARIS10:
3605 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3606 SE_XSEL(1) | SE_YSEL(1);
3607 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(2) |
3612 *rconf |= RB_MAP_PKR0(2);
3615 case CHIP_POLARIS11:
3616 case CHIP_POLARIS12:
3617 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3618 SE_XSEL(1) | SE_YSEL(1);
3626 DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type);
3632 gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev,
3633 u32 raster_config, u32 raster_config_1,
3634 unsigned rb_mask, unsigned num_rb)
3636 unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1);
3637 unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1);
3638 unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2);
3639 unsigned rb_per_se = num_rb / num_se;
3640 unsigned se_mask[4];
3643 se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask;
3644 se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask;
3645 se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask;
3646 se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask;
3648 WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4));
3649 WARN_ON(!(sh_per_se == 1 || sh_per_se == 2));
3650 WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2));
3652 if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
3653 (!se_mask[2] && !se_mask[3]))) {
3654 raster_config_1 &= ~SE_PAIR_MAP_MASK;
3656 if (!se_mask[0] && !se_mask[1]) {
3658 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3);
3661 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0);
3665 for (se = 0; se < num_se; se++) {
3666 unsigned raster_config_se = raster_config;
3667 unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
3668 unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
3669 int idx = (se / 2) * 2;
3671 if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
3672 raster_config_se &= ~SE_MAP_MASK;
3674 if (!se_mask[idx]) {
3675 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3);
3677 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0);
3681 pkr0_mask &= rb_mask;
3682 pkr1_mask &= rb_mask;
3683 if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
3684 raster_config_se &= ~PKR_MAP_MASK;
3687 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3);
3689 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0);
3693 if (rb_per_se >= 2) {
3694 unsigned rb0_mask = 1 << (se * rb_per_se);
3695 unsigned rb1_mask = rb0_mask << 1;
3697 rb0_mask &= rb_mask;
3698 rb1_mask &= rb_mask;
3699 if (!rb0_mask || !rb1_mask) {
3700 raster_config_se &= ~RB_MAP_PKR0_MASK;
3704 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3);
3707 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0);
3711 if (rb_per_se > 2) {
3712 rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
3713 rb1_mask = rb0_mask << 1;
3714 rb0_mask &= rb_mask;
3715 rb1_mask &= rb_mask;
3716 if (!rb0_mask || !rb1_mask) {
3717 raster_config_se &= ~RB_MAP_PKR1_MASK;
3721 RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3);
3724 RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0);
3730 /* GRBM_GFX_INDEX has a different offset on VI */
3731 gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff);
3732 WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se);
3733 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3736 /* GRBM_GFX_INDEX has a different offset on VI */
3737 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3740 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
3744 u32 raster_config = 0, raster_config_1 = 0;
3746 u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
3747 adev->gfx.config.max_sh_per_se;
3748 unsigned num_rb_pipes;
3750 mutex_lock(&adev->grbm_idx_mutex);
3751 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3752 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3753 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3754 data = gfx_v8_0_get_rb_active_bitmap(adev);
3755 active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
3756 rb_bitmap_width_per_sh);
3759 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3761 adev->gfx.config.backend_enable_mask = active_rbs;
3762 adev->gfx.config.num_rbs = hweight32(active_rbs);
3764 num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se *
3765 adev->gfx.config.max_shader_engines, 16);
3767 gfx_v8_0_raster_config(adev, &raster_config, &raster_config_1);
3769 if (!adev->gfx.config.backend_enable_mask ||
3770 adev->gfx.config.num_rbs >= num_rb_pipes) {
3771 WREG32(mmPA_SC_RASTER_CONFIG, raster_config);
3772 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3774 gfx_v8_0_write_harvested_raster_configs(adev, raster_config, raster_config_1,
3775 adev->gfx.config.backend_enable_mask,
3779 /* cache the values for userspace */
3780 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3781 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3782 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3783 adev->gfx.config.rb_config[i][j].rb_backend_disable =
3784 RREG32(mmCC_RB_BACKEND_DISABLE);
3785 adev->gfx.config.rb_config[i][j].user_rb_backend_disable =
3786 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3787 adev->gfx.config.rb_config[i][j].raster_config =
3788 RREG32(mmPA_SC_RASTER_CONFIG);
3789 adev->gfx.config.rb_config[i][j].raster_config_1 =
3790 RREG32(mmPA_SC_RASTER_CONFIG_1);
3793 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3794 mutex_unlock(&adev->grbm_idx_mutex);
3798 * gfx_v8_0_init_compute_vmid - gart enable
3800 * @adev: amdgpu_device pointer
3802 * Initialize compute vmid sh_mem registers
3805 #define DEFAULT_SH_MEM_BASES (0x6000)
3806 #define FIRST_COMPUTE_VMID (8)
3807 #define LAST_COMPUTE_VMID (16)
3808 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
3811 uint32_t sh_mem_config;
3812 uint32_t sh_mem_bases;
3815 * Configure apertures:
3816 * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB)
3817 * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB)
3818 * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB)
3820 sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
3822 sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
3823 SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
3824 SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
3825 SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
3826 MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
3827 SH_MEM_CONFIG__PRIVATE_ATC_MASK;
3829 mutex_lock(&adev->srbm_mutex);
3830 for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
3831 vi_srbm_select(adev, 0, 0, 0, i);
3832 /* CP and shaders */
3833 WREG32(mmSH_MEM_CONFIG, sh_mem_config);
3834 WREG32(mmSH_MEM_APE1_BASE, 1);
3835 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3836 WREG32(mmSH_MEM_BASES, sh_mem_bases);
3838 vi_srbm_select(adev, 0, 0, 0, 0);
3839 mutex_unlock(&adev->srbm_mutex);
3842 static void gfx_v8_0_config_init(struct amdgpu_device *adev)
3844 switch (adev->asic_type) {
3846 adev->gfx.config.double_offchip_lds_buf = 1;
3850 adev->gfx.config.double_offchip_lds_buf = 0;
3855 static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
3857 u32 tmp, sh_static_mem_cfg;
3860 WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF);
3861 WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3862 WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3863 WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
3865 gfx_v8_0_tiling_mode_table_init(adev);
3866 gfx_v8_0_setup_rb(adev);
3867 gfx_v8_0_get_cu_info(adev);
3868 gfx_v8_0_config_init(adev);
3870 /* XXX SH_MEM regs */
3871 /* where to put LDS, scratch, GPUVM in FSA64 space */
3872 sh_static_mem_cfg = REG_SET_FIELD(0, SH_STATIC_MEM_CONFIG,
3874 sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3876 sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3878 WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg);
3880 mutex_lock(&adev->srbm_mutex);
3881 for (i = 0; i < adev->vm_manager.id_mgr[0].num_ids; i++) {
3882 vi_srbm_select(adev, 0, 0, 0, i);
3883 /* CP and shaders */
3885 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
3886 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3887 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3888 SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3889 WREG32(mmSH_MEM_CONFIG, tmp);
3890 WREG32(mmSH_MEM_BASES, 0);
3892 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
3893 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3894 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3895 SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3896 WREG32(mmSH_MEM_CONFIG, tmp);
3897 tmp = adev->gmc.shared_aperture_start >> 48;
3898 WREG32(mmSH_MEM_BASES, tmp);
3901 WREG32(mmSH_MEM_APE1_BASE, 1);
3902 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3904 vi_srbm_select(adev, 0, 0, 0, 0);
3905 mutex_unlock(&adev->srbm_mutex);
3907 gfx_v8_0_init_compute_vmid(adev);
3909 mutex_lock(&adev->grbm_idx_mutex);
3911 * making sure that the following register writes will be broadcasted
3912 * to all the shaders
3914 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3916 WREG32(mmPA_SC_FIFO_SIZE,
3917 (adev->gfx.config.sc_prim_fifo_size_frontend <<
3918 PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
3919 (adev->gfx.config.sc_prim_fifo_size_backend <<
3920 PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
3921 (adev->gfx.config.sc_hiz_tile_fifo_size <<
3922 PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
3923 (adev->gfx.config.sc_earlyz_tile_fifo_size <<
3924 PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
3926 tmp = RREG32(mmSPI_ARB_PRIORITY);
3927 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS0, 2);
3928 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS1, 2);
3929 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS2, 2);
3930 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS3, 2);
3931 WREG32(mmSPI_ARB_PRIORITY, tmp);
3933 mutex_unlock(&adev->grbm_idx_mutex);
3937 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3942 mutex_lock(&adev->grbm_idx_mutex);
3943 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3944 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3945 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3946 for (k = 0; k < adev->usec_timeout; k++) {
3947 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3951 if (k == adev->usec_timeout) {
3952 gfx_v8_0_select_se_sh(adev, 0xffffffff,
3953 0xffffffff, 0xffffffff);
3954 mutex_unlock(&adev->grbm_idx_mutex);
3955 DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
3961 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3962 mutex_unlock(&adev->grbm_idx_mutex);
3964 mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3965 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3966 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3967 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3968 for (k = 0; k < adev->usec_timeout; k++) {
3969 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3975 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3978 u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3980 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
3981 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
3982 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
3983 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
3985 WREG32(mmCP_INT_CNTL_RING0, tmp);
3988 static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
3991 WREG32(mmRLC_CSIB_ADDR_HI,
3992 adev->gfx.rlc.clear_state_gpu_addr >> 32);
3993 WREG32(mmRLC_CSIB_ADDR_LO,
3994 adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
3995 WREG32(mmRLC_CSIB_LENGTH,
3996 adev->gfx.rlc.clear_state_size);
3999 static void gfx_v8_0_parse_ind_reg_list(int *register_list_format,
4002 int *unique_indices,
4005 int *ind_start_offsets,
4010 bool new_entry = true;
4012 for (; ind_offset < list_size; ind_offset++) {
4016 ind_start_offsets[*offset_count] = ind_offset;
4017 *offset_count = *offset_count + 1;
4018 BUG_ON(*offset_count >= max_offset);
4021 if (register_list_format[ind_offset] == 0xFFFFFFFF) {
4028 /* look for the matching indice */
4030 indices < *indices_count;
4032 if (unique_indices[indices] ==
4033 register_list_format[ind_offset])
4037 if (indices >= *indices_count) {
4038 unique_indices[*indices_count] =
4039 register_list_format[ind_offset];
4040 indices = *indices_count;
4041 *indices_count = *indices_count + 1;
4042 BUG_ON(*indices_count >= max_indices);
4045 register_list_format[ind_offset] = indices;
4049 static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
4052 int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0};
4053 int indices_count = 0;
4054 int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
4055 int offset_count = 0;
4058 unsigned int *register_list_format =
4059 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
4060 if (!register_list_format)
4062 memcpy(register_list_format, adev->gfx.rlc.register_list_format,
4063 adev->gfx.rlc.reg_list_format_size_bytes);
4065 gfx_v8_0_parse_ind_reg_list(register_list_format,
4066 RLC_FormatDirectRegListLength,
4067 adev->gfx.rlc.reg_list_format_size_bytes >> 2,
4070 ARRAY_SIZE(unique_indices),
4071 indirect_start_offsets,
4073 ARRAY_SIZE(indirect_start_offsets));
4075 /* save and restore list */
4076 WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1);
4078 WREG32(mmRLC_SRM_ARAM_ADDR, 0);
4079 for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
4080 WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]);
4083 WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start);
4084 for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
4085 WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]);
4087 list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
4088 list_size = list_size >> 1;
4089 WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size);
4090 WREG32(mmRLC_GPM_SCRATCH_DATA, list_size);
4092 /* starting offsets starts */
4093 WREG32(mmRLC_GPM_SCRATCH_ADDR,
4094 adev->gfx.rlc.starting_offsets_start);
4095 for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
4096 WREG32(mmRLC_GPM_SCRATCH_DATA,
4097 indirect_start_offsets[i]);
4099 /* unique indices */
4100 temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
4101 data = mmRLC_SRM_INDEX_CNTL_DATA_0;
4102 for (i = 0; i < ARRAY_SIZE(unique_indices); i++) {
4103 if (unique_indices[i] != 0) {
4104 WREG32(temp + i, unique_indices[i] & 0x3FFFF);
4105 WREG32(data + i, unique_indices[i] >> 20);
4108 kfree(register_list_format);
4113 static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
4115 WREG32_FIELD(RLC_SRM_CNTL, SRM_ENABLE, 1);
4118 static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev)
4122 WREG32_FIELD(CP_RB_WPTR_POLL_CNTL, IDLE_POLL_COUNT, 0x60);
4124 data = REG_SET_FIELD(0, RLC_PG_DELAY, POWER_UP_DELAY, 0x10);
4125 data = REG_SET_FIELD(data, RLC_PG_DELAY, POWER_DOWN_DELAY, 0x10);
4126 data = REG_SET_FIELD(data, RLC_PG_DELAY, CMD_PROPAGATE_DELAY, 0x10);
4127 data = REG_SET_FIELD(data, RLC_PG_DELAY, MEM_SLEEP_DELAY, 0x10);
4128 WREG32(mmRLC_PG_DELAY, data);
4130 WREG32_FIELD(RLC_PG_DELAY_2, SERDES_CMD_DELAY, 0x3);
4131 WREG32_FIELD(RLC_AUTO_PG_CTRL, GRBM_REG_SAVE_GFX_IDLE_THRESHOLD, 0x55f0);
4135 static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
4138 WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PU_ENABLE, enable ? 1 : 0);
4141 static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
4144 WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PD_ENABLE, enable ? 1 : 0);
4147 static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable)
4149 WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 0 : 1);
4152 static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
4154 if ((adev->asic_type == CHIP_CARRIZO) ||
4155 (adev->asic_type == CHIP_STONEY)) {
4156 gfx_v8_0_init_csb(adev);
4157 gfx_v8_0_init_save_restore_list(adev);
4158 gfx_v8_0_enable_save_restore_machine(adev);
4159 WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
4160 gfx_v8_0_init_power_gating(adev);
4161 WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
4162 } else if ((adev->asic_type == CHIP_POLARIS11) ||
4163 (adev->asic_type == CHIP_POLARIS12) ||
4164 (adev->asic_type == CHIP_VEGAM)) {
4165 gfx_v8_0_init_csb(adev);
4166 gfx_v8_0_init_save_restore_list(adev);
4167 gfx_v8_0_enable_save_restore_machine(adev);
4168 gfx_v8_0_init_power_gating(adev);
4173 static void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
4175 WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 0);
4177 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4178 gfx_v8_0_wait_for_rlc_serdes(adev);
4181 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
4183 WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4186 WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
4190 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
4192 WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 1);
4194 /* carrizo do enable cp interrupt after cp inited */
4195 if (!(adev->flags & AMD_IS_APU))
4196 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4201 static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev)
4203 const struct rlc_firmware_header_v2_0 *hdr;
4204 const __le32 *fw_data;
4205 unsigned i, fw_size;
4207 if (!adev->gfx.rlc_fw)
4210 hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
4211 amdgpu_ucode_print_rlc_hdr(&hdr->header);
4213 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
4214 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
4215 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
4217 WREG32(mmRLC_GPM_UCODE_ADDR, 0);
4218 for (i = 0; i < fw_size; i++)
4219 WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
4220 WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
4225 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
4230 gfx_v8_0_rlc_stop(adev);
4233 tmp = RREG32(mmRLC_CGCG_CGLS_CTRL);
4234 tmp &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
4235 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4236 WREG32(mmRLC_CGCG_CGLS_CTRL, tmp);
4237 if (adev->asic_type == CHIP_POLARIS11 ||
4238 adev->asic_type == CHIP_POLARIS10 ||
4239 adev->asic_type == CHIP_POLARIS12 ||
4240 adev->asic_type == CHIP_VEGAM) {
4241 tmp = RREG32(mmRLC_CGCG_CGLS_CTRL_3D);
4243 WREG32(mmRLC_CGCG_CGLS_CTRL_3D, tmp);
4247 WREG32(mmRLC_PG_CNTL, 0);
4249 gfx_v8_0_rlc_reset(adev);
4250 gfx_v8_0_init_pg(adev);
4253 if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
4254 /* legacy rlc firmware loading */
4255 r = gfx_v8_0_rlc_load_microcode(adev);
4260 gfx_v8_0_rlc_start(adev);
4265 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
4268 u32 tmp = RREG32(mmCP_ME_CNTL);
4271 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
4272 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
4273 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
4275 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
4276 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
4277 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
4278 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
4279 adev->gfx.gfx_ring[i].ready = false;
4281 WREG32(mmCP_ME_CNTL, tmp);
4285 static int gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
4287 const struct gfx_firmware_header_v1_0 *pfp_hdr;
4288 const struct gfx_firmware_header_v1_0 *ce_hdr;
4289 const struct gfx_firmware_header_v1_0 *me_hdr;
4290 const __le32 *fw_data;
4291 unsigned i, fw_size;
4293 if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
4296 pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
4297 adev->gfx.pfp_fw->data;
4298 ce_hdr = (const struct gfx_firmware_header_v1_0 *)
4299 adev->gfx.ce_fw->data;
4300 me_hdr = (const struct gfx_firmware_header_v1_0 *)
4301 adev->gfx.me_fw->data;
4303 amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
4304 amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
4305 amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
4307 gfx_v8_0_cp_gfx_enable(adev, false);
4310 fw_data = (const __le32 *)
4311 (adev->gfx.pfp_fw->data +
4312 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
4313 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
4314 WREG32(mmCP_PFP_UCODE_ADDR, 0);
4315 for (i = 0; i < fw_size; i++)
4316 WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
4317 WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
4320 fw_data = (const __le32 *)
4321 (adev->gfx.ce_fw->data +
4322 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
4323 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
4324 WREG32(mmCP_CE_UCODE_ADDR, 0);
4325 for (i = 0; i < fw_size; i++)
4326 WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
4327 WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
4330 fw_data = (const __le32 *)
4331 (adev->gfx.me_fw->data +
4332 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
4333 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
4334 WREG32(mmCP_ME_RAM_WADDR, 0);
4335 for (i = 0; i < fw_size; i++)
4336 WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
4337 WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
4342 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
4345 const struct cs_section_def *sect = NULL;
4346 const struct cs_extent_def *ext = NULL;
4348 /* begin clear state */
4350 /* context control state */
4353 for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4354 for (ext = sect->section; ext->extent != NULL; ++ext) {
4355 if (sect->id == SECT_CONTEXT)
4356 count += 2 + ext->reg_count;
4361 /* pa_sc_raster_config/pa_sc_raster_config1 */
4363 /* end clear state */
4371 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
4373 struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
4374 const struct cs_section_def *sect = NULL;
4375 const struct cs_extent_def *ext = NULL;
4379 WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
4380 WREG32(mmCP_ENDIAN_SWAP, 0);
4381 WREG32(mmCP_DEVICE_ID, 1);
4383 gfx_v8_0_cp_gfx_enable(adev, true);
4385 r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
4387 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
4391 /* clear state buffer */
4392 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4393 amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4395 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4396 amdgpu_ring_write(ring, 0x80000000);
4397 amdgpu_ring_write(ring, 0x80000000);
4399 for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4400 for (ext = sect->section; ext->extent != NULL; ++ext) {
4401 if (sect->id == SECT_CONTEXT) {
4402 amdgpu_ring_write(ring,
4403 PACKET3(PACKET3_SET_CONTEXT_REG,
4405 amdgpu_ring_write(ring,
4406 ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
4407 for (i = 0; i < ext->reg_count; i++)
4408 amdgpu_ring_write(ring, ext->extent[i]);
4413 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4414 amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
4415 amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config);
4416 amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config_1);
4418 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4419 amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4421 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4422 amdgpu_ring_write(ring, 0);
4424 /* init the CE partitions */
4425 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4426 amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4427 amdgpu_ring_write(ring, 0x8000);
4428 amdgpu_ring_write(ring, 0x8000);
4430 amdgpu_ring_commit(ring);
4434 static void gfx_v8_0_set_cpg_door_bell(struct amdgpu_device *adev, struct amdgpu_ring *ring)
4437 /* no gfx doorbells on iceland */
4438 if (adev->asic_type == CHIP_TOPAZ)
4441 tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
4443 if (ring->use_doorbell) {
4444 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4445 DOORBELL_OFFSET, ring->doorbell_index);
4446 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4448 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4451 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
4454 WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
4456 if (adev->flags & AMD_IS_APU)
4459 tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
4460 DOORBELL_RANGE_LOWER,
4461 AMDGPU_DOORBELL_GFX_RING0);
4462 WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
4464 WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
4465 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
4468 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
4470 struct amdgpu_ring *ring;
4473 u64 rb_addr, rptr_addr, wptr_gpu_addr;
4476 /* Set the write pointer delay */
4477 WREG32(mmCP_RB_WPTR_DELAY, 0);
4479 /* set the RB to use vmid 0 */
4480 WREG32(mmCP_RB_VMID, 0);
4482 /* Set ring buffer size */
4483 ring = &adev->gfx.gfx_ring[0];
4484 rb_bufsz = order_base_2(ring->ring_size / 8);
4485 tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
4486 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
4487 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
4488 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
4490 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
4492 WREG32(mmCP_RB0_CNTL, tmp);
4494 /* Initialize the ring buffer's read and write pointers */
4495 WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
4497 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
4499 /* set the wb address wether it's enabled or not */
4500 rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4501 WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
4502 WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
4504 wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4505 WREG32(mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
4506 WREG32(mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
4508 WREG32(mmCP_RB0_CNTL, tmp);
4510 rb_addr = ring->gpu_addr >> 8;
4511 WREG32(mmCP_RB0_BASE, rb_addr);
4512 WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
4514 gfx_v8_0_set_cpg_door_bell(adev, ring);
4515 /* start the ring */
4516 amdgpu_ring_clear_ring(ring);
4517 gfx_v8_0_cp_gfx_start(adev);
4519 r = amdgpu_ring_test_ring(ring);
4521 ring->ready = false;
4526 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
4531 WREG32(mmCP_MEC_CNTL, 0);
4533 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
4534 for (i = 0; i < adev->gfx.num_compute_rings; i++)
4535 adev->gfx.compute_ring[i].ready = false;
4536 adev->gfx.kiq.ring.ready = false;
4541 static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev)
4543 const struct gfx_firmware_header_v1_0 *mec_hdr;
4544 const __le32 *fw_data;
4545 unsigned i, fw_size;
4547 if (!adev->gfx.mec_fw)
4550 gfx_v8_0_cp_compute_enable(adev, false);
4552 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
4553 amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
4555 fw_data = (const __le32 *)
4556 (adev->gfx.mec_fw->data +
4557 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4558 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4561 WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0);
4562 for (i = 0; i < fw_size; i++)
4563 WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data+i));
4564 WREG32(mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version);
4566 /* Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
4567 if (adev->gfx.mec2_fw) {
4568 const struct gfx_firmware_header_v1_0 *mec2_hdr;
4570 mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
4571 amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header);
4573 fw_data = (const __le32 *)
4574 (adev->gfx.mec2_fw->data +
4575 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4576 fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4578 WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0);
4579 for (i = 0; i < fw_size; i++)
4580 WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data+i));
4581 WREG32(mmCP_MEC_ME2_UCODE_ADDR, adev->gfx.mec2_fw_version);
4588 static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring)
4591 struct amdgpu_device *adev = ring->adev;
4593 /* tell RLC which is KIQ queue */
4594 tmp = RREG32(mmRLC_CP_SCHEDULERS);
4596 tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
4597 WREG32(mmRLC_CP_SCHEDULERS, tmp);
4599 WREG32(mmRLC_CP_SCHEDULERS, tmp);
4602 static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev)
4604 struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
4605 uint32_t scratch, tmp = 0;
4606 uint64_t queue_mask = 0;
4609 for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
4610 if (!test_bit(i, adev->gfx.mec.queue_bitmap))
4613 /* This situation may be hit in the future if a new HW
4614 * generation exposes more than 64 queues. If so, the
4615 * definition of queue_mask needs updating */
4616 if (WARN_ON(i >= (sizeof(queue_mask)*8))) {
4617 DRM_ERROR("Invalid KCQ enabled: %d\n", i);
4621 queue_mask |= (1ull << i);
4624 r = amdgpu_gfx_scratch_get(adev, &scratch);
4626 DRM_ERROR("Failed to get scratch reg (%d).\n", r);
4629 WREG32(scratch, 0xCAFEDEAD);
4631 r = amdgpu_ring_alloc(kiq_ring, (8 * adev->gfx.num_compute_rings) + 11);
4633 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
4634 amdgpu_gfx_scratch_free(adev, scratch);
4638 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
4639 amdgpu_ring_write(kiq_ring, 0); /* vmid_mask:0 queue_type:0 (KIQ) */
4640 amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */
4641 amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */
4642 amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
4643 amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
4644 amdgpu_ring_write(kiq_ring, 0); /* oac mask */
4645 amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
4646 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4647 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4648 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
4649 uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4652 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
4653 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
4654 amdgpu_ring_write(kiq_ring,
4655 PACKET3_MAP_QUEUES_NUM_QUEUES(1));
4656 amdgpu_ring_write(kiq_ring,
4657 PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index) |
4658 PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
4659 PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
4660 PACKET3_MAP_QUEUES_ME(ring->me == 1 ? 0 : 1)); /* doorbell */
4661 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
4662 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
4663 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
4664 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
4666 /* write to scratch for completion */
4667 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
4668 amdgpu_ring_write(kiq_ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
4669 amdgpu_ring_write(kiq_ring, 0xDEADBEEF);
4670 amdgpu_ring_commit(kiq_ring);
4672 for (i = 0; i < adev->usec_timeout; i++) {
4673 tmp = RREG32(scratch);
4674 if (tmp == 0xDEADBEEF)
4678 if (i >= adev->usec_timeout) {
4679 DRM_ERROR("KCQ enable failed (scratch(0x%04X)=0x%08X)\n",
4683 amdgpu_gfx_scratch_free(adev, scratch);
4688 static int gfx_v8_0_deactivate_hqd(struct amdgpu_device *adev, u32 req)
4692 if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) {
4693 WREG32_FIELD(CP_HQD_DEQUEUE_REQUEST, DEQUEUE_REQ, req);
4694 for (i = 0; i < adev->usec_timeout; i++) {
4695 if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK))
4699 if (i == adev->usec_timeout)
4702 WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
4703 WREG32(mmCP_HQD_PQ_RPTR, 0);
4704 WREG32(mmCP_HQD_PQ_WPTR, 0);
4709 static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
4711 struct amdgpu_device *adev = ring->adev;
4712 struct vi_mqd *mqd = ring->mqd_ptr;
4713 uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
4716 mqd->header = 0xC0310800;
4717 mqd->compute_pipelinestat_enable = 0x00000001;
4718 mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4719 mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4720 mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4721 mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4722 mqd->compute_misc_reserved = 0x00000003;
4723 mqd->dynamic_cu_mask_addr_lo = lower_32_bits(ring->mqd_gpu_addr
4724 + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4725 mqd->dynamic_cu_mask_addr_hi = upper_32_bits(ring->mqd_gpu_addr
4726 + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4727 eop_base_addr = ring->eop_gpu_addr >> 8;
4728 mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
4729 mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
4731 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4732 tmp = RREG32(mmCP_HQD_EOP_CONTROL);
4733 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
4734 (order_base_2(GFX8_MEC_HPD_SIZE / 4) - 1));
4736 mqd->cp_hqd_eop_control = tmp;
4738 /* enable doorbell? */
4739 tmp = REG_SET_FIELD(RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL),
4740 CP_HQD_PQ_DOORBELL_CONTROL,
4742 ring->use_doorbell ? 1 : 0);
4744 mqd->cp_hqd_pq_doorbell_control = tmp;
4746 /* set the pointer to the MQD */
4747 mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
4748 mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
4750 /* set MQD vmid to 0 */
4751 tmp = RREG32(mmCP_MQD_CONTROL);
4752 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4753 mqd->cp_mqd_control = tmp;
4755 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4756 hqd_gpu_addr = ring->gpu_addr >> 8;
4757 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4758 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4760 /* set up the HQD, this is similar to CP_RB0_CNTL */
4761 tmp = RREG32(mmCP_HQD_PQ_CONTROL);
4762 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4763 (order_base_2(ring->ring_size / 4) - 1));
4764 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4765 ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
4767 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
4769 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
4770 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
4771 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4772 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4773 mqd->cp_hqd_pq_control = tmp;
4775 /* set the wb address whether it's enabled or not */
4776 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4777 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4778 mqd->cp_hqd_pq_rptr_report_addr_hi =
4779 upper_32_bits(wb_gpu_addr) & 0xffff;
4781 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4782 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4783 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
4784 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4787 /* enable the doorbell if requested */
4788 if (ring->use_doorbell) {
4789 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4790 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4791 DOORBELL_OFFSET, ring->doorbell_index);
4793 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4795 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4796 DOORBELL_SOURCE, 0);
4797 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4801 mqd->cp_hqd_pq_doorbell_control = tmp;
4803 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4805 mqd->cp_hqd_pq_wptr = ring->wptr;
4806 mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
4808 /* set the vmid for the queue */
4809 mqd->cp_hqd_vmid = 0;
4811 tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
4812 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
4813 mqd->cp_hqd_persistent_state = tmp;
4816 tmp = RREG32(mmCP_HQD_IB_CONTROL);
4817 tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
4818 tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MTYPE, 3);
4819 mqd->cp_hqd_ib_control = tmp;
4821 tmp = RREG32(mmCP_HQD_IQ_TIMER);
4822 tmp = REG_SET_FIELD(tmp, CP_HQD_IQ_TIMER, MTYPE, 3);
4823 mqd->cp_hqd_iq_timer = tmp;
4825 tmp = RREG32(mmCP_HQD_CTX_SAVE_CONTROL);
4826 tmp = REG_SET_FIELD(tmp, CP_HQD_CTX_SAVE_CONTROL, MTYPE, 3);
4827 mqd->cp_hqd_ctx_save_control = tmp;
4830 mqd->cp_hqd_eop_rptr = RREG32(mmCP_HQD_EOP_RPTR);
4831 mqd->cp_hqd_eop_wptr = RREG32(mmCP_HQD_EOP_WPTR);
4832 mqd->cp_hqd_pipe_priority = RREG32(mmCP_HQD_PIPE_PRIORITY);
4833 mqd->cp_hqd_queue_priority = RREG32(mmCP_HQD_QUEUE_PRIORITY);
4834 mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM);
4835 mqd->cp_hqd_ctx_save_base_addr_lo = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_LO);
4836 mqd->cp_hqd_ctx_save_base_addr_hi = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_HI);
4837 mqd->cp_hqd_cntl_stack_offset = RREG32(mmCP_HQD_CNTL_STACK_OFFSET);
4838 mqd->cp_hqd_cntl_stack_size = RREG32(mmCP_HQD_CNTL_STACK_SIZE);
4839 mqd->cp_hqd_wg_state_offset = RREG32(mmCP_HQD_WG_STATE_OFFSET);
4840 mqd->cp_hqd_ctx_save_size = RREG32(mmCP_HQD_CTX_SAVE_SIZE);
4841 mqd->cp_hqd_eop_done_events = RREG32(mmCP_HQD_EOP_EVENTS);
4842 mqd->cp_hqd_error = RREG32(mmCP_HQD_ERROR);
4843 mqd->cp_hqd_eop_wptr_mem = RREG32(mmCP_HQD_EOP_WPTR_MEM);
4844 mqd->cp_hqd_eop_dones = RREG32(mmCP_HQD_EOP_DONES);
4846 /* activate the queue */
4847 mqd->cp_hqd_active = 1;
4852 int gfx_v8_0_mqd_commit(struct amdgpu_device *adev,
4858 /* HQD registers extend from mmCP_MQD_BASE_ADDR to mmCP_HQD_ERROR */
4859 mqd_data = &mqd->cp_mqd_base_addr_lo;
4861 /* disable wptr polling */
4862 WREG32_FIELD(CP_PQ_WPTR_POLL_CNTL, EN, 0);
4864 /* program all HQD registers */
4865 for (mqd_reg = mmCP_HQD_VMID; mqd_reg <= mmCP_HQD_EOP_CONTROL; mqd_reg++)
4866 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4868 /* Tonga errata: EOP RPTR/WPTR should be left unmodified.
4869 * This is safe since EOP RPTR==WPTR for any inactive HQD
4870 * on ASICs that do not support context-save.
4871 * EOP writes/reads can start anywhere in the ring.
4873 if (adev->asic_type != CHIP_TONGA) {
4874 WREG32(mmCP_HQD_EOP_RPTR, mqd->cp_hqd_eop_rptr);
4875 WREG32(mmCP_HQD_EOP_WPTR, mqd->cp_hqd_eop_wptr);
4876 WREG32(mmCP_HQD_EOP_WPTR_MEM, mqd->cp_hqd_eop_wptr_mem);
4879 for (mqd_reg = mmCP_HQD_EOP_EVENTS; mqd_reg <= mmCP_HQD_ERROR; mqd_reg++)
4880 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4882 /* activate the HQD */
4883 for (mqd_reg = mmCP_MQD_BASE_ADDR; mqd_reg <= mmCP_HQD_ACTIVE; mqd_reg++)
4884 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4889 static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring)
4891 struct amdgpu_device *adev = ring->adev;
4892 struct vi_mqd *mqd = ring->mqd_ptr;
4893 int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
4895 gfx_v8_0_kiq_setting(ring);
4897 if (adev->in_gpu_reset) { /* for GPU_RESET case */
4898 /* reset MQD to a clean status */
4899 if (adev->gfx.mec.mqd_backup[mqd_idx])
4900 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4902 /* reset ring buffer */
4904 amdgpu_ring_clear_ring(ring);
4905 mutex_lock(&adev->srbm_mutex);
4906 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4907 gfx_v8_0_mqd_commit(adev, mqd);
4908 vi_srbm_select(adev, 0, 0, 0, 0);
4909 mutex_unlock(&adev->srbm_mutex);
4911 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4912 ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4913 ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
4914 mutex_lock(&adev->srbm_mutex);
4915 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4916 gfx_v8_0_mqd_init(ring);
4917 gfx_v8_0_mqd_commit(adev, mqd);
4918 vi_srbm_select(adev, 0, 0, 0, 0);
4919 mutex_unlock(&adev->srbm_mutex);
4921 if (adev->gfx.mec.mqd_backup[mqd_idx])
4922 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4928 static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring)
4930 struct amdgpu_device *adev = ring->adev;
4931 struct vi_mqd *mqd = ring->mqd_ptr;
4932 int mqd_idx = ring - &adev->gfx.compute_ring[0];
4934 if (!adev->in_gpu_reset && !adev->gfx.in_suspend) {
4935 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4936 ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4937 ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
4938 mutex_lock(&adev->srbm_mutex);
4939 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4940 gfx_v8_0_mqd_init(ring);
4941 vi_srbm_select(adev, 0, 0, 0, 0);
4942 mutex_unlock(&adev->srbm_mutex);
4944 if (adev->gfx.mec.mqd_backup[mqd_idx])
4945 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4946 } else if (adev->in_gpu_reset) { /* for GPU_RESET case */
4947 /* reset MQD to a clean status */
4948 if (adev->gfx.mec.mqd_backup[mqd_idx])
4949 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4950 /* reset ring buffer */
4952 amdgpu_ring_clear_ring(ring);
4954 amdgpu_ring_clear_ring(ring);
4959 static void gfx_v8_0_set_mec_doorbell_range(struct amdgpu_device *adev)
4961 if (adev->asic_type > CHIP_TONGA) {
4962 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, AMDGPU_DOORBELL_KIQ << 2);
4963 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, AMDGPU_DOORBELL_MEC_RING7 << 2);
4965 /* enable doorbells */
4966 WREG32_FIELD(CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4969 static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev)
4971 struct amdgpu_ring *ring = NULL;
4974 gfx_v8_0_cp_compute_enable(adev, true);
4976 ring = &adev->gfx.kiq.ring;
4978 r = amdgpu_bo_reserve(ring->mqd_obj, false);
4979 if (unlikely(r != 0))
4982 r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4984 r = gfx_v8_0_kiq_init_queue(ring);
4985 amdgpu_bo_kunmap(ring->mqd_obj);
4986 ring->mqd_ptr = NULL;
4988 amdgpu_bo_unreserve(ring->mqd_obj);
4992 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4993 ring = &adev->gfx.compute_ring[i];
4995 r = amdgpu_bo_reserve(ring->mqd_obj, false);
4996 if (unlikely(r != 0))
4998 r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
5000 r = gfx_v8_0_kcq_init_queue(ring);
5001 amdgpu_bo_kunmap(ring->mqd_obj);
5002 ring->mqd_ptr = NULL;
5004 amdgpu_bo_unreserve(ring->mqd_obj);
5009 gfx_v8_0_set_mec_doorbell_range(adev);
5011 r = gfx_v8_0_kiq_kcq_enable(adev);
5016 ring = &adev->gfx.kiq.ring;
5018 r = amdgpu_ring_test_ring(ring);
5020 ring->ready = false;
5025 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5026 ring = &adev->gfx.compute_ring[i];
5028 r = amdgpu_ring_test_ring(ring);
5030 ring->ready = false;
5037 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
5041 if (!(adev->flags & AMD_IS_APU))
5042 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5044 if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
5045 /* legacy firmware loading */
5046 r = gfx_v8_0_cp_gfx_load_microcode(adev);
5050 r = gfx_v8_0_cp_compute_load_microcode(adev);
5055 r = gfx_v8_0_cp_gfx_resume(adev);
5059 r = gfx_v8_0_kiq_resume(adev);
5063 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5068 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
5070 gfx_v8_0_cp_gfx_enable(adev, enable);
5071 gfx_v8_0_cp_compute_enable(adev, enable);
5074 static int gfx_v8_0_hw_init(void *handle)
5077 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5079 gfx_v8_0_init_golden_registers(adev);
5080 gfx_v8_0_gpu_init(adev);
5082 r = gfx_v8_0_rlc_resume(adev);
5086 r = gfx_v8_0_cp_resume(adev);
5091 static int gfx_v8_0_kcq_disable(struct amdgpu_ring *kiq_ring,struct amdgpu_ring *ring)
5093 struct amdgpu_device *adev = kiq_ring->adev;
5094 uint32_t scratch, tmp = 0;
5097 r = amdgpu_gfx_scratch_get(adev, &scratch);
5099 DRM_ERROR("Failed to get scratch reg (%d).\n", r);
5102 WREG32(scratch, 0xCAFEDEAD);
5104 r = amdgpu_ring_alloc(kiq_ring, 10);
5106 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
5107 amdgpu_gfx_scratch_free(adev, scratch);
5112 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
5113 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
5114 PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */
5115 PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
5116 PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) |
5117 PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
5118 amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
5119 amdgpu_ring_write(kiq_ring, 0);
5120 amdgpu_ring_write(kiq_ring, 0);
5121 amdgpu_ring_write(kiq_ring, 0);
5122 /* write to scratch for completion */
5123 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
5124 amdgpu_ring_write(kiq_ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
5125 amdgpu_ring_write(kiq_ring, 0xDEADBEEF);
5126 amdgpu_ring_commit(kiq_ring);
5128 for (i = 0; i < adev->usec_timeout; i++) {
5129 tmp = RREG32(scratch);
5130 if (tmp == 0xDEADBEEF)
5134 if (i >= adev->usec_timeout) {
5135 DRM_ERROR("KCQ disabled failed (scratch(0x%04X)=0x%08X)\n", scratch, tmp);
5138 amdgpu_gfx_scratch_free(adev, scratch);
5142 static int gfx_v8_0_hw_fini(void *handle)
5144 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5147 amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
5148 amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
5150 amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
5152 amdgpu_irq_put(adev, &adev->gfx.sq_irq, 0);
5154 /* disable KCQ to avoid CPC touch memory not valid anymore */
5155 for (i = 0; i < adev->gfx.num_compute_rings; i++)
5156 gfx_v8_0_kcq_disable(&adev->gfx.kiq.ring, &adev->gfx.compute_ring[i]);
5158 if (amdgpu_sriov_vf(adev)) {
5159 pr_debug("For SRIOV client, shouldn't do anything.\n");
5162 gfx_v8_0_cp_enable(adev, false);
5163 gfx_v8_0_rlc_stop(adev);
5165 amdgpu_device_ip_set_powergating_state(adev,
5166 AMD_IP_BLOCK_TYPE_GFX,
5167 AMD_PG_STATE_UNGATE);
5172 static int gfx_v8_0_suspend(void *handle)
5174 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5175 adev->gfx.in_suspend = true;
5176 return gfx_v8_0_hw_fini(adev);
5179 static int gfx_v8_0_resume(void *handle)
5182 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5184 r = gfx_v8_0_hw_init(adev);
5185 adev->gfx.in_suspend = false;
5189 static bool gfx_v8_0_is_idle(void *handle)
5191 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5193 if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE))
5199 static int gfx_v8_0_wait_for_idle(void *handle)
5202 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5204 for (i = 0; i < adev->usec_timeout; i++) {
5205 if (gfx_v8_0_is_idle(handle))
5213 static bool gfx_v8_0_check_soft_reset(void *handle)
5215 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5216 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5220 tmp = RREG32(mmGRBM_STATUS);
5221 if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
5222 GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
5223 GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
5224 GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
5225 GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
5226 GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK |
5227 GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
5228 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5229 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
5230 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5231 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
5232 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5233 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5237 tmp = RREG32(mmGRBM_STATUS2);
5238 if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
5239 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5240 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
5242 if (REG_GET_FIELD(tmp, GRBM_STATUS2, CPF_BUSY) ||
5243 REG_GET_FIELD(tmp, GRBM_STATUS2, CPC_BUSY) ||
5244 REG_GET_FIELD(tmp, GRBM_STATUS2, CPG_BUSY)) {
5245 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5247 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5249 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5251 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET,
5252 SOFT_RESET_GRBM, 1);
5256 tmp = RREG32(mmSRBM_STATUS);
5257 if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
5258 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5259 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5260 if (REG_GET_FIELD(tmp, SRBM_STATUS, SEM_BUSY))
5261 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5262 SRBM_SOFT_RESET, SOFT_RESET_SEM, 1);
5264 if (grbm_soft_reset || srbm_soft_reset) {
5265 adev->gfx.grbm_soft_reset = grbm_soft_reset;
5266 adev->gfx.srbm_soft_reset = srbm_soft_reset;
5269 adev->gfx.grbm_soft_reset = 0;
5270 adev->gfx.srbm_soft_reset = 0;
5275 static int gfx_v8_0_pre_soft_reset(void *handle)
5277 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5278 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5280 if ((!adev->gfx.grbm_soft_reset) &&
5281 (!adev->gfx.srbm_soft_reset))
5284 grbm_soft_reset = adev->gfx.grbm_soft_reset;
5285 srbm_soft_reset = adev->gfx.srbm_soft_reset;
5288 gfx_v8_0_rlc_stop(adev);
5290 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5291 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5292 /* Disable GFX parsing/prefetching */
5293 gfx_v8_0_cp_gfx_enable(adev, false);
5295 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5296 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5297 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5298 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5301 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5302 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5304 mutex_lock(&adev->srbm_mutex);
5305 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5306 gfx_v8_0_deactivate_hqd(adev, 2);
5307 vi_srbm_select(adev, 0, 0, 0, 0);
5308 mutex_unlock(&adev->srbm_mutex);
5310 /* Disable MEC parsing/prefetching */
5311 gfx_v8_0_cp_compute_enable(adev, false);
5317 static int gfx_v8_0_soft_reset(void *handle)
5319 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5320 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5323 if ((!adev->gfx.grbm_soft_reset) &&
5324 (!adev->gfx.srbm_soft_reset))
5327 grbm_soft_reset = adev->gfx.grbm_soft_reset;
5328 srbm_soft_reset = adev->gfx.srbm_soft_reset;
5330 if (grbm_soft_reset || srbm_soft_reset) {
5331 tmp = RREG32(mmGMCON_DEBUG);
5332 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 1);
5333 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 1);
5334 WREG32(mmGMCON_DEBUG, tmp);
5338 if (grbm_soft_reset) {
5339 tmp = RREG32(mmGRBM_SOFT_RESET);
5340 tmp |= grbm_soft_reset;
5341 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5342 WREG32(mmGRBM_SOFT_RESET, tmp);
5343 tmp = RREG32(mmGRBM_SOFT_RESET);
5347 tmp &= ~grbm_soft_reset;
5348 WREG32(mmGRBM_SOFT_RESET, tmp);
5349 tmp = RREG32(mmGRBM_SOFT_RESET);
5352 if (srbm_soft_reset) {
5353 tmp = RREG32(mmSRBM_SOFT_RESET);
5354 tmp |= srbm_soft_reset;
5355 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5356 WREG32(mmSRBM_SOFT_RESET, tmp);
5357 tmp = RREG32(mmSRBM_SOFT_RESET);
5361 tmp &= ~srbm_soft_reset;
5362 WREG32(mmSRBM_SOFT_RESET, tmp);
5363 tmp = RREG32(mmSRBM_SOFT_RESET);
5366 if (grbm_soft_reset || srbm_soft_reset) {
5367 tmp = RREG32(mmGMCON_DEBUG);
5368 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 0);
5369 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 0);
5370 WREG32(mmGMCON_DEBUG, tmp);
5373 /* Wait a little for things to settle down */
5379 static int gfx_v8_0_post_soft_reset(void *handle)
5381 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5382 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5384 if ((!adev->gfx.grbm_soft_reset) &&
5385 (!adev->gfx.srbm_soft_reset))
5388 grbm_soft_reset = adev->gfx.grbm_soft_reset;
5389 srbm_soft_reset = adev->gfx.srbm_soft_reset;
5391 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5392 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5393 gfx_v8_0_cp_gfx_resume(adev);
5395 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5396 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5397 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5398 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5401 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5402 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5404 mutex_lock(&adev->srbm_mutex);
5405 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5406 gfx_v8_0_deactivate_hqd(adev, 2);
5407 vi_srbm_select(adev, 0, 0, 0, 0);
5408 mutex_unlock(&adev->srbm_mutex);
5410 gfx_v8_0_kiq_resume(adev);
5412 gfx_v8_0_rlc_start(adev);
5418 * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
5420 * @adev: amdgpu_device pointer
5422 * Fetches a GPU clock counter snapshot.
5423 * Returns the 64 bit clock counter snapshot.
5425 static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
5429 mutex_lock(&adev->gfx.gpu_clock_mutex);
5430 WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
5431 clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
5432 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5433 mutex_unlock(&adev->gfx.gpu_clock_mutex);
5437 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
5439 uint32_t gds_base, uint32_t gds_size,
5440 uint32_t gws_base, uint32_t gws_size,
5441 uint32_t oa_base, uint32_t oa_size)
5443 gds_base = gds_base >> AMDGPU_GDS_SHIFT;
5444 gds_size = gds_size >> AMDGPU_GDS_SHIFT;
5446 gws_base = gws_base >> AMDGPU_GWS_SHIFT;
5447 gws_size = gws_size >> AMDGPU_GWS_SHIFT;
5449 oa_base = oa_base >> AMDGPU_OA_SHIFT;
5450 oa_size = oa_size >> AMDGPU_OA_SHIFT;
5453 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5454 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5455 WRITE_DATA_DST_SEL(0)));
5456 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
5457 amdgpu_ring_write(ring, 0);
5458 amdgpu_ring_write(ring, gds_base);
5461 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5462 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5463 WRITE_DATA_DST_SEL(0)));
5464 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
5465 amdgpu_ring_write(ring, 0);
5466 amdgpu_ring_write(ring, gds_size);
5469 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5470 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5471 WRITE_DATA_DST_SEL(0)));
5472 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
5473 amdgpu_ring_write(ring, 0);
5474 amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
5477 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5478 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5479 WRITE_DATA_DST_SEL(0)));
5480 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
5481 amdgpu_ring_write(ring, 0);
5482 amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
5485 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
5487 WREG32(mmSQ_IND_INDEX,
5488 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5489 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5490 (address << SQ_IND_INDEX__INDEX__SHIFT) |
5491 (SQ_IND_INDEX__FORCE_READ_MASK));
5492 return RREG32(mmSQ_IND_DATA);
5495 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
5496 uint32_t wave, uint32_t thread,
5497 uint32_t regno, uint32_t num, uint32_t *out)
5499 WREG32(mmSQ_IND_INDEX,
5500 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5501 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5502 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
5503 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
5504 (SQ_IND_INDEX__FORCE_READ_MASK) |
5505 (SQ_IND_INDEX__AUTO_INCR_MASK));
5507 *(out++) = RREG32(mmSQ_IND_DATA);
5510 static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
5512 /* type 0 wave data */
5513 dst[(*no_fields)++] = 0;
5514 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
5515 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
5516 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
5517 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
5518 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
5519 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
5520 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
5521 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
5522 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
5523 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
5524 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
5525 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
5526 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_LO);
5527 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_HI);
5528 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_LO);
5529 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI);
5530 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
5531 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
5534 static void gfx_v8_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
5535 uint32_t wave, uint32_t start,
5536 uint32_t size, uint32_t *dst)
5539 adev, simd, wave, 0,
5540 start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
5544 static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = {
5545 .get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter,
5546 .select_se_sh = &gfx_v8_0_select_se_sh,
5547 .read_wave_data = &gfx_v8_0_read_wave_data,
5548 .read_wave_sgprs = &gfx_v8_0_read_wave_sgprs,
5549 .select_me_pipe_q = &gfx_v8_0_select_me_pipe_q
5552 static int gfx_v8_0_early_init(void *handle)
5554 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5556 adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
5557 adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
5558 adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
5559 gfx_v8_0_set_ring_funcs(adev);
5560 gfx_v8_0_set_irq_funcs(adev);
5561 gfx_v8_0_set_gds_init(adev);
5562 gfx_v8_0_set_rlc_funcs(adev);
5567 static int gfx_v8_0_late_init(void *handle)
5569 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5572 r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
5576 r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
5580 /* requires IBs so do in late init after IB pool is initialized */
5581 r = gfx_v8_0_do_edc_gpr_workarounds(adev);
5585 r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0);
5587 DRM_ERROR("amdgpu_irq_get() failed to get IRQ for EDC, r: %d.\n", r);
5591 r = amdgpu_irq_get(adev, &adev->gfx.sq_irq, 0);
5594 "amdgpu_irq_get() failed to get IRQ for SQ, r: %d.\n",
5602 static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
5605 if (((adev->asic_type == CHIP_POLARIS11) ||
5606 (adev->asic_type == CHIP_POLARIS12) ||
5607 (adev->asic_type == CHIP_VEGAM)) &&
5608 adev->powerplay.pp_funcs->set_powergating_by_smu)
5609 /* Send msg to SMU via Powerplay */
5610 amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, enable);
5612 WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0);
5615 static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
5618 WREG32_FIELD(RLC_PG_CNTL, DYN_PER_CU_PG_ENABLE, enable ? 1 : 0);
5621 static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
5624 WREG32_FIELD(RLC_PG_CNTL, QUICK_PG_ENABLE, enable ? 1 : 0);
5627 static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
5630 WREG32_FIELD(RLC_PG_CNTL, GFX_POWER_GATING_ENABLE, enable ? 1 : 0);
5633 static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev,
5636 WREG32_FIELD(RLC_PG_CNTL, GFX_PIPELINE_PG_ENABLE, enable ? 1 : 0);
5638 /* Read any GFX register to wake up GFX. */
5640 RREG32(mmDB_RENDER_CONTROL);
5643 static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev,
5646 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
5647 cz_enable_gfx_cg_power_gating(adev, true);
5648 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
5649 cz_enable_gfx_pipeline_power_gating(adev, true);
5651 cz_enable_gfx_cg_power_gating(adev, false);
5652 cz_enable_gfx_pipeline_power_gating(adev, false);
5656 static int gfx_v8_0_set_powergating_state(void *handle,
5657 enum amd_powergating_state state)
5659 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5660 bool enable = (state == AMD_PG_STATE_GATE);
5662 if (amdgpu_sriov_vf(adev))
5665 switch (adev->asic_type) {
5669 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
5670 cz_enable_sck_slow_down_on_power_up(adev, true);
5671 cz_enable_sck_slow_down_on_power_down(adev, true);
5673 cz_enable_sck_slow_down_on_power_up(adev, false);
5674 cz_enable_sck_slow_down_on_power_down(adev, false);
5676 if (adev->pg_flags & AMD_PG_SUPPORT_CP)
5677 cz_enable_cp_power_gating(adev, true);
5679 cz_enable_cp_power_gating(adev, false);
5681 cz_update_gfx_cg_power_gating(adev, enable);
5683 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5684 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5686 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5688 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5689 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5691 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5693 case CHIP_POLARIS11:
5694 case CHIP_POLARIS12:
5696 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5697 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5699 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5701 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5702 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5704 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5706 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable)
5707 polaris11_enable_gfx_quick_mg_power_gating(adev, true);
5709 polaris11_enable_gfx_quick_mg_power_gating(adev, false);
5718 static void gfx_v8_0_get_clockgating_state(void *handle, u32 *flags)
5720 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5723 if (amdgpu_sriov_vf(adev))
5726 /* AMD_CG_SUPPORT_GFX_MGCG */
5727 data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5728 if (!(data & RLC_CGTT_MGCG_OVERRIDE__CPF_MASK))
5729 *flags |= AMD_CG_SUPPORT_GFX_MGCG;
5731 /* AMD_CG_SUPPORT_GFX_CGLG */
5732 data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5733 if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5734 *flags |= AMD_CG_SUPPORT_GFX_CGCG;
5736 /* AMD_CG_SUPPORT_GFX_CGLS */
5737 if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5738 *flags |= AMD_CG_SUPPORT_GFX_CGLS;
5740 /* AMD_CG_SUPPORT_GFX_CGTS */
5741 data = RREG32(mmCGTS_SM_CTRL_REG);
5742 if (!(data & CGTS_SM_CTRL_REG__OVERRIDE_MASK))
5743 *flags |= AMD_CG_SUPPORT_GFX_CGTS;
5745 /* AMD_CG_SUPPORT_GFX_CGTS_LS */
5746 if (!(data & CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK))
5747 *flags |= AMD_CG_SUPPORT_GFX_CGTS_LS;
5749 /* AMD_CG_SUPPORT_GFX_RLC_LS */
5750 data = RREG32(mmRLC_MEM_SLP_CNTL);
5751 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
5752 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
5754 /* AMD_CG_SUPPORT_GFX_CP_LS */
5755 data = RREG32(mmCP_MEM_SLP_CNTL);
5756 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
5757 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
5760 static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
5761 uint32_t reg_addr, uint32_t cmd)
5765 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
5767 WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5768 WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5770 data = RREG32(mmRLC_SERDES_WR_CTRL);
5771 if (adev->asic_type == CHIP_STONEY)
5772 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5773 RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5774 RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5775 RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5776 RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5777 RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5778 RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5779 RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5780 RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5782 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5783 RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5784 RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5785 RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5786 RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5787 RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5788 RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5789 RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5790 RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
5791 RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
5792 RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5793 data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
5794 (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
5795 (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
5796 (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
5798 WREG32(mmRLC_SERDES_WR_CTRL, data);
5801 #define MSG_ENTER_RLC_SAFE_MODE 1
5802 #define MSG_EXIT_RLC_SAFE_MODE 0
5803 #define RLC_GPR_REG2__REQ_MASK 0x00000001
5804 #define RLC_GPR_REG2__REQ__SHIFT 0
5805 #define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001
5806 #define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e
5808 static void iceland_enter_rlc_safe_mode(struct amdgpu_device *adev)
5813 data = RREG32(mmRLC_CNTL);
5814 if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5817 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5818 data |= RLC_SAFE_MODE__CMD_MASK;
5819 data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5820 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
5821 WREG32(mmRLC_SAFE_MODE, data);
5823 for (i = 0; i < adev->usec_timeout; i++) {
5824 if ((RREG32(mmRLC_GPM_STAT) &
5825 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5826 RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5827 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5828 RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5833 for (i = 0; i < adev->usec_timeout; i++) {
5834 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5838 adev->gfx.rlc.in_safe_mode = true;
5842 static void iceland_exit_rlc_safe_mode(struct amdgpu_device *adev)
5847 data = RREG32(mmRLC_CNTL);
5848 if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5851 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5852 if (adev->gfx.rlc.in_safe_mode) {
5853 data |= RLC_SAFE_MODE__CMD_MASK;
5854 data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5855 WREG32(mmRLC_SAFE_MODE, data);
5856 adev->gfx.rlc.in_safe_mode = false;
5860 for (i = 0; i < adev->usec_timeout; i++) {
5861 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5867 static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
5868 .enter_safe_mode = iceland_enter_rlc_safe_mode,
5869 .exit_safe_mode = iceland_exit_rlc_safe_mode
5872 static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
5875 uint32_t temp, data;
5877 adev->gfx.rlc.funcs->enter_safe_mode(adev);
5879 /* It is disabled by HW by default */
5880 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
5881 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5882 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS)
5883 /* 1 - RLC memory Light sleep */
5884 WREG32_FIELD(RLC_MEM_SLP_CNTL, RLC_MEM_LS_EN, 1);
5886 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS)
5887 WREG32_FIELD(CP_MEM_SLP_CNTL, CP_MEM_LS_EN, 1);
5890 /* 3 - RLC_CGTT_MGCG_OVERRIDE */
5891 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5892 if (adev->flags & AMD_IS_APU)
5893 data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5894 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5895 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK);
5897 data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5898 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5899 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5900 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5903 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5905 /* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5906 gfx_v8_0_wait_for_rlc_serdes(adev);
5908 /* 5 - clear mgcg override */
5909 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5911 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
5912 /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
5913 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5914 data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
5915 data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
5916 data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
5917 data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
5918 if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
5919 (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
5920 data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
5921 data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
5922 data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
5924 WREG32(mmCGTS_SM_CTRL_REG, data);
5928 /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5929 gfx_v8_0_wait_for_rlc_serdes(adev);
5931 /* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
5932 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5933 data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5934 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5935 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5936 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5938 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5940 /* 2 - disable MGLS in RLC */
5941 data = RREG32(mmRLC_MEM_SLP_CNTL);
5942 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
5943 data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5944 WREG32(mmRLC_MEM_SLP_CNTL, data);
5947 /* 3 - disable MGLS in CP */
5948 data = RREG32(mmCP_MEM_SLP_CNTL);
5949 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
5950 data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5951 WREG32(mmCP_MEM_SLP_CNTL, data);
5954 /* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
5955 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5956 data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
5957 CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
5959 WREG32(mmCGTS_SM_CTRL_REG, data);
5961 /* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5962 gfx_v8_0_wait_for_rlc_serdes(adev);
5964 /* 6 - set mgcg override */
5965 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5969 /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5970 gfx_v8_0_wait_for_rlc_serdes(adev);
5973 adev->gfx.rlc.funcs->exit_safe_mode(adev);
5976 static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5979 uint32_t temp, temp1, data, data1;
5981 temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5983 adev->gfx.rlc.funcs->enter_safe_mode(adev);
5985 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
5986 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5987 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
5989 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5991 /* : wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5992 gfx_v8_0_wait_for_rlc_serdes(adev);
5994 /* 2 - clear cgcg override */
5995 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5997 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5998 gfx_v8_0_wait_for_rlc_serdes(adev);
6000 /* 3 - write cmd to set CGLS */
6001 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
6003 /* 4 - enable cgcg */
6004 data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
6006 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
6008 data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
6010 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
6011 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
6014 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
6016 data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
6020 WREG32(mmRLC_CGCG_CGLS_CTRL, data);
6022 /* 5 enable cntx_empty_int_enable/cntx_busy_int_enable/
6023 * Cmp_busy/GFX_Idle interrupts
6025 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
6027 /* disable cntx_empty_int_enable & GFX Idle interrupt */
6028 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
6031 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
6032 data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
6033 RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
6035 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
6037 /* read gfx register to wake up cgcg */
6038 RREG32(mmCB_CGTT_SCLK_CTRL);
6039 RREG32(mmCB_CGTT_SCLK_CTRL);
6040 RREG32(mmCB_CGTT_SCLK_CTRL);
6041 RREG32(mmCB_CGTT_SCLK_CTRL);
6043 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6044 gfx_v8_0_wait_for_rlc_serdes(adev);
6046 /* write cmd to Set CGCG Overrride */
6047 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
6049 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6050 gfx_v8_0_wait_for_rlc_serdes(adev);
6052 /* write cmd to Clear CGLS */
6053 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
6055 /* disable cgcg, cgls should be disabled too. */
6056 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
6057 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
6059 WREG32(mmRLC_CGCG_CGLS_CTRL, data);
6060 /* enable interrupts again for PG */
6061 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
6064 gfx_v8_0_wait_for_rlc_serdes(adev);
6066 adev->gfx.rlc.funcs->exit_safe_mode(adev);
6068 static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
6072 /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
6073 * === MGCG + MGLS + TS(CG/LS) ===
6075 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
6076 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
6078 /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
6079 * === CGCG + CGLS ===
6081 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
6082 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
6087 static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev,
6088 enum amd_clockgating_state state)
6090 uint32_t msg_id, pp_state = 0;
6091 uint32_t pp_support_state = 0;
6093 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
6094 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
6095 pp_support_state = PP_STATE_SUPPORT_LS;
6096 pp_state = PP_STATE_LS;
6098 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
6099 pp_support_state |= PP_STATE_SUPPORT_CG;
6100 pp_state |= PP_STATE_CG;
6102 if (state == AMD_CG_STATE_UNGATE)
6105 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6109 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6110 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6113 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
6114 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
6115 pp_support_state = PP_STATE_SUPPORT_LS;
6116 pp_state = PP_STATE_LS;
6119 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
6120 pp_support_state |= PP_STATE_SUPPORT_CG;
6121 pp_state |= PP_STATE_CG;
6124 if (state == AMD_CG_STATE_UNGATE)
6127 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6131 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6132 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6138 static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev,
6139 enum amd_clockgating_state state)
6142 uint32_t msg_id, pp_state = 0;
6143 uint32_t pp_support_state = 0;
6145 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
6146 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
6147 pp_support_state = PP_STATE_SUPPORT_LS;
6148 pp_state = PP_STATE_LS;
6150 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
6151 pp_support_state |= PP_STATE_SUPPORT_CG;
6152 pp_state |= PP_STATE_CG;
6154 if (state == AMD_CG_STATE_UNGATE)
6157 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6161 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6162 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6165 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_GFX_3D_CGLS)) {
6166 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) {
6167 pp_support_state = PP_STATE_SUPPORT_LS;
6168 pp_state = PP_STATE_LS;
6170 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) {
6171 pp_support_state |= PP_STATE_SUPPORT_CG;
6172 pp_state |= PP_STATE_CG;
6174 if (state == AMD_CG_STATE_UNGATE)
6177 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6181 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6182 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6185 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
6186 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
6187 pp_support_state = PP_STATE_SUPPORT_LS;
6188 pp_state = PP_STATE_LS;
6191 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
6192 pp_support_state |= PP_STATE_SUPPORT_CG;
6193 pp_state |= PP_STATE_CG;
6196 if (state == AMD_CG_STATE_UNGATE)
6199 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6203 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6204 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6207 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
6208 pp_support_state = PP_STATE_SUPPORT_LS;
6210 if (state == AMD_CG_STATE_UNGATE)
6213 pp_state = PP_STATE_LS;
6215 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6219 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6220 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6223 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
6224 pp_support_state = PP_STATE_SUPPORT_LS;
6226 if (state == AMD_CG_STATE_UNGATE)
6229 pp_state = PP_STATE_LS;
6230 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6234 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6235 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6241 static int gfx_v8_0_set_clockgating_state(void *handle,
6242 enum amd_clockgating_state state)
6244 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
6246 if (amdgpu_sriov_vf(adev))
6249 switch (adev->asic_type) {
6253 gfx_v8_0_update_gfx_clock_gating(adev,
6254 state == AMD_CG_STATE_GATE);
6257 gfx_v8_0_tonga_update_gfx_clock_gating(adev, state);
6259 case CHIP_POLARIS10:
6260 case CHIP_POLARIS11:
6261 case CHIP_POLARIS12:
6263 gfx_v8_0_polaris_update_gfx_clock_gating(adev, state);
6271 static u64 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring)
6273 return ring->adev->wb.wb[ring->rptr_offs];
6276 static u64 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
6278 struct amdgpu_device *adev = ring->adev;
6280 if (ring->use_doorbell)
6281 /* XXX check if swapping is necessary on BE */
6282 return ring->adev->wb.wb[ring->wptr_offs];
6284 return RREG32(mmCP_RB0_WPTR);
6287 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
6289 struct amdgpu_device *adev = ring->adev;
6291 if (ring->use_doorbell) {
6292 /* XXX check if swapping is necessary on BE */
6293 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6294 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6296 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
6297 (void)RREG32(mmCP_RB0_WPTR);
6301 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
6303 u32 ref_and_mask, reg_mem_engine;
6305 if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) ||
6306 (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)) {
6309 ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
6312 ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
6319 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
6320 reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
6323 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6324 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
6325 WAIT_REG_MEM_FUNCTION(3) | /* == */
6327 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
6328 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
6329 amdgpu_ring_write(ring, ref_and_mask);
6330 amdgpu_ring_write(ring, ref_and_mask);
6331 amdgpu_ring_write(ring, 0x20); /* poll interval */
6334 static void gfx_v8_0_ring_emit_vgt_flush(struct amdgpu_ring *ring)
6336 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6337 amdgpu_ring_write(ring, EVENT_TYPE(VS_PARTIAL_FLUSH) |
6340 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6341 amdgpu_ring_write(ring, EVENT_TYPE(VGT_FLUSH) |
6345 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
6346 struct amdgpu_ib *ib,
6347 unsigned vmid, bool ctx_switch)
6349 u32 header, control = 0;
6351 if (ib->flags & AMDGPU_IB_FLAG_CE)
6352 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
6354 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
6356 control |= ib->length_dw | (vmid << 24);
6358 if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
6359 control |= INDIRECT_BUFFER_PRE_ENB(1);
6361 if (!(ib->flags & AMDGPU_IB_FLAG_CE))
6362 gfx_v8_0_ring_emit_de_meta(ring);
6365 amdgpu_ring_write(ring, header);
6366 amdgpu_ring_write(ring,
6370 (ib->gpu_addr & 0xFFFFFFFC));
6371 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6372 amdgpu_ring_write(ring, control);
6375 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
6376 struct amdgpu_ib *ib,
6377 unsigned vmid, bool ctx_switch)
6379 u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
6381 amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
6382 amdgpu_ring_write(ring,
6386 (ib->gpu_addr & 0xFFFFFFFC));
6387 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6388 amdgpu_ring_write(ring, control);
6391 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
6392 u64 seq, unsigned flags)
6394 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6395 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6397 /* EVENT_WRITE_EOP - flush caches, send int */
6398 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6399 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6401 EOP_TC_WB_ACTION_EN |
6402 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6404 amdgpu_ring_write(ring, addr & 0xfffffffc);
6405 amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
6406 DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6407 amdgpu_ring_write(ring, lower_32_bits(seq));
6408 amdgpu_ring_write(ring, upper_32_bits(seq));
6412 static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
6414 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6415 uint32_t seq = ring->fence_drv.sync_seq;
6416 uint64_t addr = ring->fence_drv.gpu_addr;
6418 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6419 amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
6420 WAIT_REG_MEM_FUNCTION(3) | /* equal */
6421 WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
6422 amdgpu_ring_write(ring, addr & 0xfffffffc);
6423 amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
6424 amdgpu_ring_write(ring, seq);
6425 amdgpu_ring_write(ring, 0xffffffff);
6426 amdgpu_ring_write(ring, 4); /* poll interval */
6429 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
6430 unsigned vmid, uint64_t pd_addr)
6432 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6434 amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
6436 /* wait for the invalidate to complete */
6437 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6438 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6439 WAIT_REG_MEM_FUNCTION(0) | /* always */
6440 WAIT_REG_MEM_ENGINE(0))); /* me */
6441 amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6442 amdgpu_ring_write(ring, 0);
6443 amdgpu_ring_write(ring, 0); /* ref */
6444 amdgpu_ring_write(ring, 0); /* mask */
6445 amdgpu_ring_write(ring, 0x20); /* poll interval */
6447 /* compute doesn't have PFP */
6449 /* sync PFP to ME, otherwise we might get invalid PFP reads */
6450 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6451 amdgpu_ring_write(ring, 0x0);
6455 static u64 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
6457 return ring->adev->wb.wb[ring->wptr_offs];
6460 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
6462 struct amdgpu_device *adev = ring->adev;
6464 /* XXX check if swapping is necessary on BE */
6465 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6466 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6469 static void gfx_v8_0_ring_set_pipe_percent(struct amdgpu_ring *ring,
6472 struct amdgpu_device *adev = ring->adev;
6473 int pipe_num, tmp, reg;
6474 int pipe_percent = acquire ? SPI_WCL_PIPE_PERCENT_GFX__VALUE_MASK : 0x1;
6476 pipe_num = ring->me * adev->gfx.mec.num_pipe_per_mec + ring->pipe;
6478 /* first me only has 2 entries, GFX and HP3D */
6482 reg = mmSPI_WCL_PIPE_PERCENT_GFX + pipe_num;
6484 tmp = REG_SET_FIELD(tmp, SPI_WCL_PIPE_PERCENT_GFX, VALUE, pipe_percent);
6488 static void gfx_v8_0_pipe_reserve_resources(struct amdgpu_device *adev,
6489 struct amdgpu_ring *ring,
6494 struct amdgpu_ring *iring;
6496 mutex_lock(&adev->gfx.pipe_reserve_mutex);
6497 pipe = amdgpu_gfx_queue_to_bit(adev, ring->me, ring->pipe, 0);
6499 set_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6501 clear_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6503 if (!bitmap_weight(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)) {
6504 /* Clear all reservations - everyone reacquires all resources */
6505 for (i = 0; i < adev->gfx.num_gfx_rings; ++i)
6506 gfx_v8_0_ring_set_pipe_percent(&adev->gfx.gfx_ring[i],
6509 for (i = 0; i < adev->gfx.num_compute_rings; ++i)
6510 gfx_v8_0_ring_set_pipe_percent(&adev->gfx.compute_ring[i],
6513 /* Lower all pipes without a current reservation */
6514 for (i = 0; i < adev->gfx.num_gfx_rings; ++i) {
6515 iring = &adev->gfx.gfx_ring[i];
6516 pipe = amdgpu_gfx_queue_to_bit(adev,
6520 reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6521 gfx_v8_0_ring_set_pipe_percent(iring, reserve);
6524 for (i = 0; i < adev->gfx.num_compute_rings; ++i) {
6525 iring = &adev->gfx.compute_ring[i];
6526 pipe = amdgpu_gfx_queue_to_bit(adev,
6530 reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6531 gfx_v8_0_ring_set_pipe_percent(iring, reserve);
6535 mutex_unlock(&adev->gfx.pipe_reserve_mutex);
6538 static void gfx_v8_0_hqd_set_priority(struct amdgpu_device *adev,
6539 struct amdgpu_ring *ring,
6542 uint32_t pipe_priority = acquire ? 0x2 : 0x0;
6543 uint32_t queue_priority = acquire ? 0xf : 0x0;
6545 mutex_lock(&adev->srbm_mutex);
6546 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
6548 WREG32(mmCP_HQD_PIPE_PRIORITY, pipe_priority);
6549 WREG32(mmCP_HQD_QUEUE_PRIORITY, queue_priority);
6551 vi_srbm_select(adev, 0, 0, 0, 0);
6552 mutex_unlock(&adev->srbm_mutex);
6554 static void gfx_v8_0_ring_set_priority_compute(struct amdgpu_ring *ring,
6555 enum drm_sched_priority priority)
6557 struct amdgpu_device *adev = ring->adev;
6558 bool acquire = priority == DRM_SCHED_PRIORITY_HIGH_HW;
6560 if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE)
6563 gfx_v8_0_hqd_set_priority(adev, ring, acquire);
6564 gfx_v8_0_pipe_reserve_resources(adev, ring, acquire);
6567 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
6571 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6572 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6574 /* RELEASE_MEM - flush caches, send int */
6575 amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
6576 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6578 EOP_TC_WB_ACTION_EN |
6579 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6581 amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6582 amdgpu_ring_write(ring, addr & 0xfffffffc);
6583 amdgpu_ring_write(ring, upper_32_bits(addr));
6584 amdgpu_ring_write(ring, lower_32_bits(seq));
6585 amdgpu_ring_write(ring, upper_32_bits(seq));
6588 static void gfx_v8_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
6589 u64 seq, unsigned int flags)
6591 /* we only allocate 32bit for each seq wb address */
6592 BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
6594 /* write fence seq to the "addr" */
6595 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6596 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6597 WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
6598 amdgpu_ring_write(ring, lower_32_bits(addr));
6599 amdgpu_ring_write(ring, upper_32_bits(addr));
6600 amdgpu_ring_write(ring, lower_32_bits(seq));
6602 if (flags & AMDGPU_FENCE_FLAG_INT) {
6603 /* set register to trigger INT */
6604 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6605 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6606 WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
6607 amdgpu_ring_write(ring, mmCPC_INT_STATUS);
6608 amdgpu_ring_write(ring, 0);
6609 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
6613 static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring)
6615 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6616 amdgpu_ring_write(ring, 0);
6619 static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
6623 if (amdgpu_sriov_vf(ring->adev))
6624 gfx_v8_0_ring_emit_ce_meta(ring);
6626 dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
6627 if (flags & AMDGPU_HAVE_CTX_SWITCH) {
6628 gfx_v8_0_ring_emit_vgt_flush(ring);
6629 /* set load_global_config & load_global_uconfig */
6631 /* set load_cs_sh_regs */
6633 /* set load_per_context_state & load_gfx_sh_regs for GFX */
6636 /* set load_ce_ram if preamble presented */
6637 if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
6640 /* still load_ce_ram if this is the first time preamble presented
6641 * although there is no context switch happens.
6643 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
6647 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6648 amdgpu_ring_write(ring, dw2);
6649 amdgpu_ring_write(ring, 0);
6652 static unsigned gfx_v8_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
6656 amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
6657 amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
6658 amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
6659 amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
6660 ret = ring->wptr & ring->buf_mask;
6661 amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
6665 static void gfx_v8_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
6669 BUG_ON(offset > ring->buf_mask);
6670 BUG_ON(ring->ring[offset] != 0x55aa55aa);
6672 cur = (ring->wptr & ring->buf_mask) - 1;
6673 if (likely(cur > offset))
6674 ring->ring[offset] = cur - offset;
6676 ring->ring[offset] = (ring->ring_size >> 2) - offset + cur;
6679 static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
6681 struct amdgpu_device *adev = ring->adev;
6683 amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
6684 amdgpu_ring_write(ring, 0 | /* src: register*/
6685 (5 << 8) | /* dst: memory */
6686 (1 << 20)); /* write confirm */
6687 amdgpu_ring_write(ring, reg);
6688 amdgpu_ring_write(ring, 0);
6689 amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
6690 adev->virt.reg_val_offs * 4));
6691 amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
6692 adev->virt.reg_val_offs * 4));
6695 static void gfx_v8_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
6700 switch (ring->funcs->type) {
6701 case AMDGPU_RING_TYPE_GFX:
6702 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
6704 case AMDGPU_RING_TYPE_KIQ:
6705 cmd = 1 << 16; /* no inc addr */
6712 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6713 amdgpu_ring_write(ring, cmd);
6714 amdgpu_ring_write(ring, reg);
6715 amdgpu_ring_write(ring, 0);
6716 amdgpu_ring_write(ring, val);
6719 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
6720 enum amdgpu_interrupt_state state)
6722 WREG32_FIELD(CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE,
6723 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6726 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
6728 enum amdgpu_interrupt_state state)
6730 u32 mec_int_cntl, mec_int_cntl_reg;
6733 * amdgpu controls only the first MEC. That's why this function only
6734 * handles the setting of interrupts for this specific MEC. All other
6735 * pipes' interrupts are set by amdkfd.
6741 mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
6744 mec_int_cntl_reg = mmCP_ME1_PIPE1_INT_CNTL;
6747 mec_int_cntl_reg = mmCP_ME1_PIPE2_INT_CNTL;
6750 mec_int_cntl_reg = mmCP_ME1_PIPE3_INT_CNTL;
6753 DRM_DEBUG("invalid pipe %d\n", pipe);
6757 DRM_DEBUG("invalid me %d\n", me);
6762 case AMDGPU_IRQ_STATE_DISABLE:
6763 mec_int_cntl = RREG32(mec_int_cntl_reg);
6764 mec_int_cntl &= ~CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6765 WREG32(mec_int_cntl_reg, mec_int_cntl);
6767 case AMDGPU_IRQ_STATE_ENABLE:
6768 mec_int_cntl = RREG32(mec_int_cntl_reg);
6769 mec_int_cntl |= CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6770 WREG32(mec_int_cntl_reg, mec_int_cntl);
6777 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
6778 struct amdgpu_irq_src *source,
6780 enum amdgpu_interrupt_state state)
6782 WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE,
6783 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6788 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
6789 struct amdgpu_irq_src *source,
6791 enum amdgpu_interrupt_state state)
6793 WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE,
6794 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6799 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6800 struct amdgpu_irq_src *src,
6802 enum amdgpu_interrupt_state state)
6805 case AMDGPU_CP_IRQ_GFX_EOP:
6806 gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
6808 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6809 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6811 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6812 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6814 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6815 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6817 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6818 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6820 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6821 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6823 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6824 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6826 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6827 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6829 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6830 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6838 static int gfx_v8_0_set_cp_ecc_int_state(struct amdgpu_device *adev,
6839 struct amdgpu_irq_src *source,
6841 enum amdgpu_interrupt_state state)
6846 case AMDGPU_IRQ_STATE_DISABLE:
6850 case AMDGPU_IRQ_STATE_ENABLE:
6858 WREG32_FIELD(CP_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6859 WREG32_FIELD(CP_INT_CNTL_RING0, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6860 WREG32_FIELD(CP_INT_CNTL_RING1, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6861 WREG32_FIELD(CP_INT_CNTL_RING2, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6862 WREG32_FIELD(CPC_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6863 WREG32_FIELD(CP_ME1_PIPE0_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6865 WREG32_FIELD(CP_ME1_PIPE1_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6867 WREG32_FIELD(CP_ME1_PIPE2_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6869 WREG32_FIELD(CP_ME1_PIPE3_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6871 WREG32_FIELD(CP_ME2_PIPE0_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6873 WREG32_FIELD(CP_ME2_PIPE1_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6875 WREG32_FIELD(CP_ME2_PIPE2_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6877 WREG32_FIELD(CP_ME2_PIPE3_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6883 static int gfx_v8_0_set_sq_int_state(struct amdgpu_device *adev,
6884 struct amdgpu_irq_src *source,
6886 enum amdgpu_interrupt_state state)
6891 case AMDGPU_IRQ_STATE_DISABLE:
6895 case AMDGPU_IRQ_STATE_ENABLE:
6903 WREG32_FIELD(SQ_INTERRUPT_MSG_CTRL, STALL,
6909 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
6910 struct amdgpu_irq_src *source,
6911 struct amdgpu_iv_entry *entry)
6914 u8 me_id, pipe_id, queue_id;
6915 struct amdgpu_ring *ring;
6917 DRM_DEBUG("IH: CP EOP\n");
6918 me_id = (entry->ring_id & 0x0c) >> 2;
6919 pipe_id = (entry->ring_id & 0x03) >> 0;
6920 queue_id = (entry->ring_id & 0x70) >> 4;
6924 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6928 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6929 ring = &adev->gfx.compute_ring[i];
6930 /* Per-queue interrupt is supported for MEC starting from VI.
6931 * The interrupt can only be enabled/disabled per pipe instead of per queue.
6933 if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6934 amdgpu_fence_process(ring);
6941 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
6942 struct amdgpu_irq_src *source,
6943 struct amdgpu_iv_entry *entry)
6945 DRM_ERROR("Illegal register access in command stream\n");
6946 schedule_work(&adev->reset_work);
6950 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
6951 struct amdgpu_irq_src *source,
6952 struct amdgpu_iv_entry *entry)
6954 DRM_ERROR("Illegal instruction in command stream\n");
6955 schedule_work(&adev->reset_work);
6959 static int gfx_v8_0_cp_ecc_error_irq(struct amdgpu_device *adev,
6960 struct amdgpu_irq_src *source,
6961 struct amdgpu_iv_entry *entry)
6963 DRM_ERROR("CP EDC/ECC error detected.");
6967 static void gfx_v8_0_parse_sq_irq(struct amdgpu_device *adev, unsigned ih_data)
6969 u32 enc, se_id, sh_id, cu_id;
6971 int sq_edc_source = -1;
6973 enc = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_CMN, ENCODING);
6974 se_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_CMN, SE_ID);
6978 DRM_INFO("SQ general purpose intr detected:"
6979 "se_id %d, immed_overflow %d, host_reg_overflow %d,"
6980 "host_cmd_overflow %d, cmd_timestamp %d,"
6981 "reg_timestamp %d, thread_trace_buff_full %d,"
6982 "wlt %d, thread_trace %d.\n",
6984 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, IMMED_OVERFLOW),
6985 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, HOST_REG_OVERFLOW),
6986 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, HOST_CMD_OVERFLOW),
6987 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, CMD_TIMESTAMP),
6988 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, REG_TIMESTAMP),
6989 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, THREAD_TRACE_BUF_FULL),
6990 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, WLT),
6991 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, THREAD_TRACE)
6997 cu_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, CU_ID);
6998 sh_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, SH_ID);
7001 * This function can be called either directly from ISR
7002 * or from BH in which case we can access SQ_EDC_INFO
7006 mutex_lock(&adev->grbm_idx_mutex);
7007 gfx_v8_0_select_se_sh(adev, se_id, sh_id, cu_id);
7009 sq_edc_source = REG_GET_FIELD(RREG32(mmSQ_EDC_INFO), SQ_EDC_INFO, SOURCE);
7011 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
7012 mutex_unlock(&adev->grbm_idx_mutex);
7016 sprintf(type, "instruction intr");
7018 sprintf(type, "EDC/ECC error");
7022 "se_id %d, sh_id %d, cu_id %d, simd_id %d, wave_id %d, vm_id %d "
7023 "trap %s, sq_ed_info.source %s.\n",
7024 type, se_id, sh_id, cu_id,
7025 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, SIMD_ID),
7026 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, WAVE_ID),
7027 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, VM_ID),
7028 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, PRIV) ? "true" : "false",
7029 (sq_edc_source != -1) ? sq_edc_source_names[sq_edc_source] : "unavailable"
7033 DRM_ERROR("SQ invalid encoding type\n.");
7037 static void gfx_v8_0_sq_irq_work_func(struct work_struct *work)
7040 struct amdgpu_device *adev = container_of(work, struct amdgpu_device, gfx.sq_work.work);
7041 struct sq_work *sq_work = container_of(work, struct sq_work, work);
7043 gfx_v8_0_parse_sq_irq(adev, sq_work->ih_data);
7046 static int gfx_v8_0_sq_irq(struct amdgpu_device *adev,
7047 struct amdgpu_irq_src *source,
7048 struct amdgpu_iv_entry *entry)
7050 unsigned ih_data = entry->src_data[0];
7053 * Try to submit work so SQ_EDC_INFO can be accessed from
7054 * BH. If previous work submission hasn't finished yet
7055 * just print whatever info is possible directly from the ISR.
7057 if (work_pending(&adev->gfx.sq_work.work)) {
7058 gfx_v8_0_parse_sq_irq(adev, ih_data);
7060 adev->gfx.sq_work.ih_data = ih_data;
7061 schedule_work(&adev->gfx.sq_work.work);
7067 static int gfx_v8_0_kiq_set_interrupt_state(struct amdgpu_device *adev,
7068 struct amdgpu_irq_src *src,
7070 enum amdgpu_interrupt_state state)
7072 struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
7075 case AMDGPU_CP_KIQ_IRQ_DRIVER0:
7076 WREG32_FIELD(CPC_INT_CNTL, GENERIC2_INT_ENABLE,
7077 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
7079 WREG32_FIELD_OFFSET(CP_ME1_PIPE0_INT_CNTL,
7081 GENERIC2_INT_ENABLE,
7082 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
7084 WREG32_FIELD_OFFSET(CP_ME2_PIPE0_INT_CNTL,
7086 GENERIC2_INT_ENABLE,
7087 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
7090 BUG(); /* kiq only support GENERIC2_INT now */
7096 static int gfx_v8_0_kiq_irq(struct amdgpu_device *adev,
7097 struct amdgpu_irq_src *source,
7098 struct amdgpu_iv_entry *entry)
7100 u8 me_id, pipe_id, queue_id;
7101 struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
7103 me_id = (entry->ring_id & 0x0c) >> 2;
7104 pipe_id = (entry->ring_id & 0x03) >> 0;
7105 queue_id = (entry->ring_id & 0x70) >> 4;
7106 DRM_DEBUG("IH: CPC GENERIC2_INT, me:%d, pipe:%d, queue:%d\n",
7107 me_id, pipe_id, queue_id);
7109 amdgpu_fence_process(ring);
7113 static const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
7115 .early_init = gfx_v8_0_early_init,
7116 .late_init = gfx_v8_0_late_init,
7117 .sw_init = gfx_v8_0_sw_init,
7118 .sw_fini = gfx_v8_0_sw_fini,
7119 .hw_init = gfx_v8_0_hw_init,
7120 .hw_fini = gfx_v8_0_hw_fini,
7121 .suspend = gfx_v8_0_suspend,
7122 .resume = gfx_v8_0_resume,
7123 .is_idle = gfx_v8_0_is_idle,
7124 .wait_for_idle = gfx_v8_0_wait_for_idle,
7125 .check_soft_reset = gfx_v8_0_check_soft_reset,
7126 .pre_soft_reset = gfx_v8_0_pre_soft_reset,
7127 .soft_reset = gfx_v8_0_soft_reset,
7128 .post_soft_reset = gfx_v8_0_post_soft_reset,
7129 .set_clockgating_state = gfx_v8_0_set_clockgating_state,
7130 .set_powergating_state = gfx_v8_0_set_powergating_state,
7131 .get_clockgating_state = gfx_v8_0_get_clockgating_state,
7134 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
7135 .type = AMDGPU_RING_TYPE_GFX,
7137 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
7138 .support_64bit_ptrs = false,
7139 .get_rptr = gfx_v8_0_ring_get_rptr,
7140 .get_wptr = gfx_v8_0_ring_get_wptr_gfx,
7141 .set_wptr = gfx_v8_0_ring_set_wptr_gfx,
7142 .emit_frame_size = /* maximum 215dw if count 16 IBs in */
7144 7 + /* PIPELINE_SYNC */
7145 VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 9 + /* VM_FLUSH */
7146 8 + /* FENCE for VM_FLUSH */
7147 20 + /* GDS switch */
7148 4 + /* double SWITCH_BUFFER,
7149 the first COND_EXEC jump to the place just
7150 prior to this double SWITCH_BUFFER */
7158 8 + 8 + /* FENCE x2 */
7159 2, /* SWITCH_BUFFER */
7160 .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_gfx */
7161 .emit_ib = gfx_v8_0_ring_emit_ib_gfx,
7162 .emit_fence = gfx_v8_0_ring_emit_fence_gfx,
7163 .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
7164 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
7165 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
7166 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
7167 .test_ring = gfx_v8_0_ring_test_ring,
7168 .test_ib = gfx_v8_0_ring_test_ib,
7169 .insert_nop = amdgpu_ring_insert_nop,
7170 .pad_ib = amdgpu_ring_generic_pad_ib,
7171 .emit_switch_buffer = gfx_v8_ring_emit_sb,
7172 .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
7173 .init_cond_exec = gfx_v8_0_ring_emit_init_cond_exec,
7174 .patch_cond_exec = gfx_v8_0_ring_emit_patch_cond_exec,
7175 .emit_wreg = gfx_v8_0_ring_emit_wreg,
7178 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
7179 .type = AMDGPU_RING_TYPE_COMPUTE,
7181 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
7182 .support_64bit_ptrs = false,
7183 .get_rptr = gfx_v8_0_ring_get_rptr,
7184 .get_wptr = gfx_v8_0_ring_get_wptr_compute,
7185 .set_wptr = gfx_v8_0_ring_set_wptr_compute,
7187 20 + /* gfx_v8_0_ring_emit_gds_switch */
7188 7 + /* gfx_v8_0_ring_emit_hdp_flush */
7189 5 + /* hdp_invalidate */
7190 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
7191 VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v8_0_ring_emit_vm_flush */
7192 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
7193 .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */
7194 .emit_ib = gfx_v8_0_ring_emit_ib_compute,
7195 .emit_fence = gfx_v8_0_ring_emit_fence_compute,
7196 .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
7197 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
7198 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
7199 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
7200 .test_ring = gfx_v8_0_ring_test_ring,
7201 .test_ib = gfx_v8_0_ring_test_ib,
7202 .insert_nop = amdgpu_ring_insert_nop,
7203 .pad_ib = amdgpu_ring_generic_pad_ib,
7204 .set_priority = gfx_v8_0_ring_set_priority_compute,
7205 .emit_wreg = gfx_v8_0_ring_emit_wreg,
7208 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {
7209 .type = AMDGPU_RING_TYPE_KIQ,
7211 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
7212 .support_64bit_ptrs = false,
7213 .get_rptr = gfx_v8_0_ring_get_rptr,
7214 .get_wptr = gfx_v8_0_ring_get_wptr_compute,
7215 .set_wptr = gfx_v8_0_ring_set_wptr_compute,
7217 20 + /* gfx_v8_0_ring_emit_gds_switch */
7218 7 + /* gfx_v8_0_ring_emit_hdp_flush */
7219 5 + /* hdp_invalidate */
7220 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
7221 17 + /* gfx_v8_0_ring_emit_vm_flush */
7222 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */
7223 .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */
7224 .emit_ib = gfx_v8_0_ring_emit_ib_compute,
7225 .emit_fence = gfx_v8_0_ring_emit_fence_kiq,
7226 .test_ring = gfx_v8_0_ring_test_ring,
7227 .test_ib = gfx_v8_0_ring_test_ib,
7228 .insert_nop = amdgpu_ring_insert_nop,
7229 .pad_ib = amdgpu_ring_generic_pad_ib,
7230 .emit_rreg = gfx_v8_0_ring_emit_rreg,
7231 .emit_wreg = gfx_v8_0_ring_emit_wreg,
7234 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
7238 adev->gfx.kiq.ring.funcs = &gfx_v8_0_ring_funcs_kiq;
7240 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
7241 adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
7243 for (i = 0; i < adev->gfx.num_compute_rings; i++)
7244 adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
7247 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
7248 .set = gfx_v8_0_set_eop_interrupt_state,
7249 .process = gfx_v8_0_eop_irq,
7252 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
7253 .set = gfx_v8_0_set_priv_reg_fault_state,
7254 .process = gfx_v8_0_priv_reg_irq,
7257 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
7258 .set = gfx_v8_0_set_priv_inst_fault_state,
7259 .process = gfx_v8_0_priv_inst_irq,
7262 static const struct amdgpu_irq_src_funcs gfx_v8_0_kiq_irq_funcs = {
7263 .set = gfx_v8_0_kiq_set_interrupt_state,
7264 .process = gfx_v8_0_kiq_irq,
7267 static const struct amdgpu_irq_src_funcs gfx_v8_0_cp_ecc_error_irq_funcs = {
7268 .set = gfx_v8_0_set_cp_ecc_int_state,
7269 .process = gfx_v8_0_cp_ecc_error_irq,
7272 static const struct amdgpu_irq_src_funcs gfx_v8_0_sq_irq_funcs = {
7273 .set = gfx_v8_0_set_sq_int_state,
7274 .process = gfx_v8_0_sq_irq,
7277 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
7279 adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
7280 adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
7282 adev->gfx.priv_reg_irq.num_types = 1;
7283 adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
7285 adev->gfx.priv_inst_irq.num_types = 1;
7286 adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
7288 adev->gfx.kiq.irq.num_types = AMDGPU_CP_KIQ_IRQ_LAST;
7289 adev->gfx.kiq.irq.funcs = &gfx_v8_0_kiq_irq_funcs;
7291 adev->gfx.cp_ecc_error_irq.num_types = 1;
7292 adev->gfx.cp_ecc_error_irq.funcs = &gfx_v8_0_cp_ecc_error_irq_funcs;
7294 adev->gfx.sq_irq.num_types = 1;
7295 adev->gfx.sq_irq.funcs = &gfx_v8_0_sq_irq_funcs;
7298 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
7300 adev->gfx.rlc.funcs = &iceland_rlc_funcs;
7303 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
7305 /* init asci gds info */
7306 adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
7307 adev->gds.gws.total_size = 64;
7308 adev->gds.oa.total_size = 16;
7310 if (adev->gds.mem.total_size == 64 * 1024) {
7311 adev->gds.mem.gfx_partition_size = 4096;
7312 adev->gds.mem.cs_partition_size = 4096;
7314 adev->gds.gws.gfx_partition_size = 4;
7315 adev->gds.gws.cs_partition_size = 4;
7317 adev->gds.oa.gfx_partition_size = 4;
7318 adev->gds.oa.cs_partition_size = 1;
7320 adev->gds.mem.gfx_partition_size = 1024;
7321 adev->gds.mem.cs_partition_size = 1024;
7323 adev->gds.gws.gfx_partition_size = 16;
7324 adev->gds.gws.cs_partition_size = 16;
7326 adev->gds.oa.gfx_partition_size = 4;
7327 adev->gds.oa.cs_partition_size = 4;
7331 static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
7339 data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7340 data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7342 WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
7345 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
7349 data = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) |
7350 RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
7352 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
7354 return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask;
7357 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
7359 int i, j, k, counter, active_cu_number = 0;
7360 u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
7361 struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
7362 unsigned disable_masks[4 * 2];
7365 memset(cu_info, 0, sizeof(*cu_info));
7367 if (adev->flags & AMD_IS_APU)
7370 ao_cu_num = adev->gfx.config.max_cu_per_sh;
7372 amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
7374 mutex_lock(&adev->grbm_idx_mutex);
7375 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
7376 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
7380 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
7382 gfx_v8_0_set_user_cu_inactive_bitmap(
7383 adev, disable_masks[i * 2 + j]);
7384 bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
7385 cu_info->bitmap[i][j] = bitmap;
7387 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
7388 if (bitmap & mask) {
7389 if (counter < ao_cu_num)
7395 active_cu_number += counter;
7397 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
7398 cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
7401 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
7402 mutex_unlock(&adev->grbm_idx_mutex);
7404 cu_info->number = active_cu_number;
7405 cu_info->ao_cu_mask = ao_cu_mask;
7406 cu_info->simd_per_cu = NUM_SIMD_PER_CU;
7407 cu_info->max_waves_per_simd = 10;
7408 cu_info->max_scratch_slots_per_cu = 32;
7409 cu_info->wave_front_size = 64;
7410 cu_info->lds_size = 64;
7413 const struct amdgpu_ip_block_version gfx_v8_0_ip_block =
7415 .type = AMD_IP_BLOCK_TYPE_GFX,
7419 .funcs = &gfx_v8_0_ip_funcs,
7422 const struct amdgpu_ip_block_version gfx_v8_1_ip_block =
7424 .type = AMD_IP_BLOCK_TYPE_GFX,
7428 .funcs = &gfx_v8_0_ip_funcs,
7431 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
7433 uint64_t ce_payload_addr;
7436 struct vi_ce_ib_state regular;
7437 struct vi_ce_ib_state_chained_ib chained;
7440 if (ring->adev->virt.chained_ib_support) {
7441 ce_payload_addr = amdgpu_csa_vaddr(ring->adev) +
7442 offsetof(struct vi_gfx_meta_data_chained_ib, ce_payload);
7443 cnt_ce = (sizeof(ce_payload.chained) >> 2) + 4 - 2;
7445 ce_payload_addr = amdgpu_csa_vaddr(ring->adev) +
7446 offsetof(struct vi_gfx_meta_data, ce_payload);
7447 cnt_ce = (sizeof(ce_payload.regular) >> 2) + 4 - 2;
7450 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_ce));
7451 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
7452 WRITE_DATA_DST_SEL(8) |
7454 WRITE_DATA_CACHE_POLICY(0));
7455 amdgpu_ring_write(ring, lower_32_bits(ce_payload_addr));
7456 amdgpu_ring_write(ring, upper_32_bits(ce_payload_addr));
7457 amdgpu_ring_write_multiple(ring, (void *)&ce_payload, cnt_ce - 2);
7460 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring)
7462 uint64_t de_payload_addr, gds_addr, csa_addr;
7465 struct vi_de_ib_state regular;
7466 struct vi_de_ib_state_chained_ib chained;
7469 csa_addr = amdgpu_csa_vaddr(ring->adev);
7470 gds_addr = csa_addr + 4096;
7471 if (ring->adev->virt.chained_ib_support) {
7472 de_payload.chained.gds_backup_addrlo = lower_32_bits(gds_addr);
7473 de_payload.chained.gds_backup_addrhi = upper_32_bits(gds_addr);
7474 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data_chained_ib, de_payload);
7475 cnt_de = (sizeof(de_payload.chained) >> 2) + 4 - 2;
7477 de_payload.regular.gds_backup_addrlo = lower_32_bits(gds_addr);
7478 de_payload.regular.gds_backup_addrhi = upper_32_bits(gds_addr);
7479 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data, de_payload);
7480 cnt_de = (sizeof(de_payload.regular) >> 2) + 4 - 2;
7483 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_de));
7484 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
7485 WRITE_DATA_DST_SEL(8) |
7487 WRITE_DATA_CACHE_POLICY(0));
7488 amdgpu_ring_write(ring, lower_32_bits(de_payload_addr));
7489 amdgpu_ring_write(ring, upper_32_bits(de_payload_addr));
7490 amdgpu_ring_write_multiple(ring, (void *)&de_payload, cnt_de - 2);