2 * Copyright 2014 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
23 #include <linux/firmware.h>
26 #include "amdgpu_gfx.h"
28 #include "vi_structs.h"
30 #include "amdgpu_ucode.h"
31 #include "amdgpu_atombios.h"
32 #include "atombios_i2c.h"
33 #include "clearstate_vi.h"
35 #include "gmc/gmc_8_2_d.h"
36 #include "gmc/gmc_8_2_sh_mask.h"
38 #include "oss/oss_3_0_d.h"
39 #include "oss/oss_3_0_sh_mask.h"
41 #include "bif/bif_5_0_d.h"
42 #include "bif/bif_5_0_sh_mask.h"
43 #include "gca/gfx_8_0_d.h"
44 #include "gca/gfx_8_0_enum.h"
45 #include "gca/gfx_8_0_sh_mask.h"
46 #include "gca/gfx_8_0_enum.h"
48 #include "dce/dce_10_0_d.h"
49 #include "dce/dce_10_0_sh_mask.h"
51 #include "smu/smu_7_1_3_d.h"
53 #define GFX8_NUM_GFX_RINGS 1
54 #define GFX8_MEC_HPD_SIZE 2048
56 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
57 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
58 #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
59 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
61 #define ARRAY_MODE(x) ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
62 #define PIPE_CONFIG(x) ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
63 #define TILE_SPLIT(x) ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
64 #define MICRO_TILE_MODE_NEW(x) ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
65 #define SAMPLE_SPLIT(x) ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
66 #define BANK_WIDTH(x) ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
67 #define BANK_HEIGHT(x) ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
68 #define MACRO_TILE_ASPECT(x) ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
69 #define NUM_BANKS(x) ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
71 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK 0x00000001L
72 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK 0x00000002L
73 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK 0x00000004L
74 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK 0x00000008L
75 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK 0x00000010L
76 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK 0x00000020L
79 #define SET_BPM_SERDES_CMD 1
80 #define CLE_BPM_SERDES_CMD 0
82 /* BPM Register Address*/
84 BPM_REG_CGLS_EN = 0, /* Enable/Disable CGLS */
85 BPM_REG_CGLS_ON, /* ON/OFF CGLS: shall be controlled by RLC FW */
86 BPM_REG_CGCG_OVERRIDE, /* Set/Clear CGCG Override */
87 BPM_REG_MGCG_OVERRIDE, /* Set/Clear MGCG Override */
88 BPM_REG_FGCG_OVERRIDE, /* Set/Clear FGCG Override */
92 #define RLC_FormatDirectRegListLength 14
94 MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
95 MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
96 MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
97 MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
98 MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
99 MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
101 MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
102 MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
103 MODULE_FIRMWARE("amdgpu/stoney_me.bin");
104 MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
105 MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
107 MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
108 MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
109 MODULE_FIRMWARE("amdgpu/tonga_me.bin");
110 MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
111 MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
112 MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
114 MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
115 MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
116 MODULE_FIRMWARE("amdgpu/topaz_me.bin");
117 MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
118 MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
120 MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
121 MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
122 MODULE_FIRMWARE("amdgpu/fiji_me.bin");
123 MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
124 MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
125 MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
127 MODULE_FIRMWARE("amdgpu/polaris11_ce.bin");
128 MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin");
129 MODULE_FIRMWARE("amdgpu/polaris11_me.bin");
130 MODULE_FIRMWARE("amdgpu/polaris11_mec.bin");
131 MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin");
132 MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin");
134 MODULE_FIRMWARE("amdgpu/polaris10_ce.bin");
135 MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin");
136 MODULE_FIRMWARE("amdgpu/polaris10_me.bin");
137 MODULE_FIRMWARE("amdgpu/polaris10_mec.bin");
138 MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin");
139 MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin");
141 MODULE_FIRMWARE("amdgpu/polaris12_ce.bin");
142 MODULE_FIRMWARE("amdgpu/polaris12_pfp.bin");
143 MODULE_FIRMWARE("amdgpu/polaris12_me.bin");
144 MODULE_FIRMWARE("amdgpu/polaris12_mec.bin");
145 MODULE_FIRMWARE("amdgpu/polaris12_mec2.bin");
146 MODULE_FIRMWARE("amdgpu/polaris12_rlc.bin");
148 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
150 {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
151 {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
152 {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
153 {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
154 {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
155 {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
156 {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
157 {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
158 {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
159 {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
160 {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
161 {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
162 {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
163 {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
164 {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
165 {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
168 static const u32 golden_settings_tonga_a11[] =
170 mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
171 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
172 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
173 mmGB_GPU_ID, 0x0000000f, 0x00000000,
174 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
175 mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
176 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
177 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
178 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
179 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
180 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
181 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
182 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
183 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
184 mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
185 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
188 static const u32 tonga_golden_common_all[] =
190 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
191 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
192 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
193 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
194 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
195 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
196 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
197 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
200 static const u32 tonga_mgcg_cgcg_init[] =
202 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
203 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
204 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
205 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
206 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
207 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
208 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
209 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
210 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
211 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
212 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
213 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
214 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
215 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
216 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
217 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
218 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
219 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
220 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
221 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
222 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
223 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
224 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
225 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
226 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
227 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
228 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
229 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
230 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
231 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
232 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
233 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
234 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
235 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
236 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
237 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
238 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
239 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
240 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
241 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
242 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
243 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
244 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
245 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
246 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
247 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
248 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
249 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
250 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
251 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
252 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
253 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
254 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
255 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
256 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
257 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
258 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
259 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
260 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
261 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
262 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
263 mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
264 mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
265 mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
266 mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
267 mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
268 mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
269 mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
270 mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
271 mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
272 mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
273 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
274 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
275 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
276 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
279 static const u32 golden_settings_polaris11_a11[] =
281 mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208,
282 mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
283 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
284 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
285 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
286 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
287 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
288 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
289 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
290 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
291 mmSQ_CONFIG, 0x07f80000, 0x01180000,
292 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
293 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
294 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
295 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
296 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
297 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
300 static const u32 polaris11_golden_common_all[] =
302 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
303 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002,
304 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
305 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
306 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
307 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
310 static const u32 golden_settings_polaris10_a11[] =
312 mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
313 mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
314 mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
315 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
316 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
317 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
318 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
319 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
320 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a,
321 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
322 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
323 mmSQ_CONFIG, 0x07f80000, 0x07180000,
324 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
325 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
326 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
327 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
328 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
331 static const u32 polaris10_golden_common_all[] =
333 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
334 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
335 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
336 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
337 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
338 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
339 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
340 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
343 static const u32 fiji_golden_common_all[] =
345 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
346 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
347 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
348 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
349 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
350 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
351 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
352 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
353 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
354 mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
357 static const u32 golden_settings_fiji_a10[] =
359 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
360 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
361 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
362 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
363 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
364 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
365 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
366 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
367 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
368 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
369 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
372 static const u32 fiji_mgcg_cgcg_init[] =
374 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
375 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
376 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
377 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
378 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
379 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
380 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
381 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
382 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
383 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
384 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
385 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
386 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
387 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
388 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
389 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
390 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
391 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
392 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
393 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
394 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
395 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
396 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
397 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
398 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
399 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
400 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
401 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
402 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
403 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
404 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
405 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
406 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
407 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
408 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
411 static const u32 golden_settings_iceland_a11[] =
413 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
414 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
415 mmDB_DEBUG3, 0xc0000000, 0xc0000000,
416 mmGB_GPU_ID, 0x0000000f, 0x00000000,
417 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
418 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
419 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
420 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
421 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
422 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
423 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
424 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
425 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
426 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
427 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
428 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
431 static const u32 iceland_golden_common_all[] =
433 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
434 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
435 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
436 mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
437 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
438 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
439 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
440 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
443 static const u32 iceland_mgcg_cgcg_init[] =
445 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
446 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
447 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
448 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
449 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
450 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
451 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
452 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
453 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
454 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
455 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
456 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
457 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
458 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
459 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
460 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
461 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
462 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
463 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
464 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
465 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
466 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
467 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
468 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
469 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
470 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
471 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
472 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
473 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
474 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
475 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
476 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
477 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
478 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
479 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
480 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
481 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
482 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
483 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
484 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
485 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
486 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
487 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
488 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
489 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
490 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
491 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
492 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
493 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
494 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
495 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
496 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
497 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
498 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
499 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
500 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
501 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
502 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
503 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
504 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
505 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
506 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
507 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
508 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
511 static const u32 cz_golden_settings_a11[] =
513 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
514 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
515 mmGB_GPU_ID, 0x0000000f, 0x00000000,
516 mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
517 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
518 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
519 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
520 mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
521 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
522 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
523 mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
524 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
527 static const u32 cz_golden_common_all[] =
529 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
530 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
531 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
532 mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
533 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
534 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
535 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
536 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
539 static const u32 cz_mgcg_cgcg_init[] =
541 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
542 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
543 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
544 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
545 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
546 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
547 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
548 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
549 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
550 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
551 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
552 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
553 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
554 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
555 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
556 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
557 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
558 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
559 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
560 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
561 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
562 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
563 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
564 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
565 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
566 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
567 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
568 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
569 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
570 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
571 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
572 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
573 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
574 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
575 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
576 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
577 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
578 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
579 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
580 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
581 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
582 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
583 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
584 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
585 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
586 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
587 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
588 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
589 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
590 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
591 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
592 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
593 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
594 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
595 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
596 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
597 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
598 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
599 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
600 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
601 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
602 mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
603 mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
604 mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
605 mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
606 mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
607 mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
608 mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
609 mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
610 mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
611 mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
612 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
613 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
614 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
615 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
618 static const u32 stoney_golden_settings_a11[] =
620 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
621 mmGB_GPU_ID, 0x0000000f, 0x00000000,
622 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
623 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
624 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
625 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
626 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
627 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
628 mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
629 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
632 static const u32 stoney_golden_common_all[] =
634 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
635 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
636 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
637 mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
638 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
639 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
640 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
641 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
644 static const u32 stoney_mgcg_cgcg_init[] =
646 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
647 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
648 mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
649 mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
650 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
653 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
654 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
655 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
656 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
657 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
658 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
659 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring);
660 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring);
662 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
664 switch (adev->asic_type) {
666 amdgpu_program_register_sequence(adev,
667 iceland_mgcg_cgcg_init,
668 (const u32)ARRAY_SIZE(iceland_mgcg_cgcg_init));
669 amdgpu_program_register_sequence(adev,
670 golden_settings_iceland_a11,
671 (const u32)ARRAY_SIZE(golden_settings_iceland_a11));
672 amdgpu_program_register_sequence(adev,
673 iceland_golden_common_all,
674 (const u32)ARRAY_SIZE(iceland_golden_common_all));
677 amdgpu_program_register_sequence(adev,
679 (const u32)ARRAY_SIZE(fiji_mgcg_cgcg_init));
680 amdgpu_program_register_sequence(adev,
681 golden_settings_fiji_a10,
682 (const u32)ARRAY_SIZE(golden_settings_fiji_a10));
683 amdgpu_program_register_sequence(adev,
684 fiji_golden_common_all,
685 (const u32)ARRAY_SIZE(fiji_golden_common_all));
689 amdgpu_program_register_sequence(adev,
690 tonga_mgcg_cgcg_init,
691 (const u32)ARRAY_SIZE(tonga_mgcg_cgcg_init));
692 amdgpu_program_register_sequence(adev,
693 golden_settings_tonga_a11,
694 (const u32)ARRAY_SIZE(golden_settings_tonga_a11));
695 amdgpu_program_register_sequence(adev,
696 tonga_golden_common_all,
697 (const u32)ARRAY_SIZE(tonga_golden_common_all));
701 amdgpu_program_register_sequence(adev,
702 golden_settings_polaris11_a11,
703 (const u32)ARRAY_SIZE(golden_settings_polaris11_a11));
704 amdgpu_program_register_sequence(adev,
705 polaris11_golden_common_all,
706 (const u32)ARRAY_SIZE(polaris11_golden_common_all));
709 amdgpu_program_register_sequence(adev,
710 golden_settings_polaris10_a11,
711 (const u32)ARRAY_SIZE(golden_settings_polaris10_a11));
712 amdgpu_program_register_sequence(adev,
713 polaris10_golden_common_all,
714 (const u32)ARRAY_SIZE(polaris10_golden_common_all));
715 WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C);
716 if (adev->pdev->revision == 0xc7 &&
717 ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) ||
718 (adev->pdev->subsystem_device == 0x4a8 && adev->pdev->subsystem_vendor == 0x1043) ||
719 (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1682))) {
720 amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD);
721 amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0);
725 amdgpu_program_register_sequence(adev,
727 (const u32)ARRAY_SIZE(cz_mgcg_cgcg_init));
728 amdgpu_program_register_sequence(adev,
729 cz_golden_settings_a11,
730 (const u32)ARRAY_SIZE(cz_golden_settings_a11));
731 amdgpu_program_register_sequence(adev,
732 cz_golden_common_all,
733 (const u32)ARRAY_SIZE(cz_golden_common_all));
736 amdgpu_program_register_sequence(adev,
737 stoney_mgcg_cgcg_init,
738 (const u32)ARRAY_SIZE(stoney_mgcg_cgcg_init));
739 amdgpu_program_register_sequence(adev,
740 stoney_golden_settings_a11,
741 (const u32)ARRAY_SIZE(stoney_golden_settings_a11));
742 amdgpu_program_register_sequence(adev,
743 stoney_golden_common_all,
744 (const u32)ARRAY_SIZE(stoney_golden_common_all));
751 static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
753 adev->gfx.scratch.num_reg = 8;
754 adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
755 adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
758 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
760 struct amdgpu_device *adev = ring->adev;
766 r = amdgpu_gfx_scratch_get(adev, &scratch);
768 DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
771 WREG32(scratch, 0xCAFEDEAD);
772 r = amdgpu_ring_alloc(ring, 3);
774 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
776 amdgpu_gfx_scratch_free(adev, scratch);
779 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
780 amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
781 amdgpu_ring_write(ring, 0xDEADBEEF);
782 amdgpu_ring_commit(ring);
784 for (i = 0; i < adev->usec_timeout; i++) {
785 tmp = RREG32(scratch);
786 if (tmp == 0xDEADBEEF)
790 if (i < adev->usec_timeout) {
791 DRM_INFO("ring test on %d succeeded in %d usecs\n",
794 DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
795 ring->idx, scratch, tmp);
798 amdgpu_gfx_scratch_free(adev, scratch);
802 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
804 struct amdgpu_device *adev = ring->adev;
806 struct dma_fence *f = NULL;
811 r = amdgpu_gfx_scratch_get(adev, &scratch);
813 DRM_ERROR("amdgpu: failed to get scratch reg (%ld).\n", r);
816 WREG32(scratch, 0xCAFEDEAD);
817 memset(&ib, 0, sizeof(ib));
818 r = amdgpu_ib_get(adev, NULL, 256, &ib);
820 DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
823 ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
824 ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START));
825 ib.ptr[2] = 0xDEADBEEF;
828 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
832 r = dma_fence_wait_timeout(f, false, timeout);
834 DRM_ERROR("amdgpu: IB test timed out.\n");
838 DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
841 tmp = RREG32(scratch);
842 if (tmp == 0xDEADBEEF) {
843 DRM_INFO("ib test on ring %d succeeded\n", ring->idx);
846 DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n",
851 amdgpu_ib_free(adev, &ib, NULL);
854 amdgpu_gfx_scratch_free(adev, scratch);
859 static void gfx_v8_0_free_microcode(struct amdgpu_device *adev)
861 release_firmware(adev->gfx.pfp_fw);
862 adev->gfx.pfp_fw = NULL;
863 release_firmware(adev->gfx.me_fw);
864 adev->gfx.me_fw = NULL;
865 release_firmware(adev->gfx.ce_fw);
866 adev->gfx.ce_fw = NULL;
867 release_firmware(adev->gfx.rlc_fw);
868 adev->gfx.rlc_fw = NULL;
869 release_firmware(adev->gfx.mec_fw);
870 adev->gfx.mec_fw = NULL;
871 if ((adev->asic_type != CHIP_STONEY) &&
872 (adev->asic_type != CHIP_TOPAZ))
873 release_firmware(adev->gfx.mec2_fw);
874 adev->gfx.mec2_fw = NULL;
876 kfree(adev->gfx.rlc.register_list_format);
879 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
881 const char *chip_name;
884 struct amdgpu_firmware_info *info = NULL;
885 const struct common_firmware_header *header = NULL;
886 const struct gfx_firmware_header_v1_0 *cp_hdr;
887 const struct rlc_firmware_header_v2_0 *rlc_hdr;
888 unsigned int *tmp = NULL, i;
892 switch (adev->asic_type) {
900 chip_name = "carrizo";
906 chip_name = "polaris11";
909 chip_name = "polaris10";
912 chip_name = "polaris12";
915 chip_name = "stoney";
921 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
922 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
925 err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
928 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
929 adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
930 adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
932 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
933 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
936 err = amdgpu_ucode_validate(adev->gfx.me_fw);
939 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
940 adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
942 adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
944 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
945 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
948 err = amdgpu_ucode_validate(adev->gfx.ce_fw);
951 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
952 adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
953 adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
956 * Support for MCBP/Virtualization in combination with chained IBs is
957 * formal released on feature version #46
959 if (adev->gfx.ce_feature_version >= 46 &&
960 adev->gfx.pfp_feature_version >= 46) {
961 adev->virt.chained_ib_support = true;
962 DRM_INFO("Chained IB support enabled!\n");
964 adev->virt.chained_ib_support = false;
966 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
967 err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
970 err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
971 rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
972 adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
973 adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
975 adev->gfx.rlc.save_and_restore_offset =
976 le32_to_cpu(rlc_hdr->save_and_restore_offset);
977 adev->gfx.rlc.clear_state_descriptor_offset =
978 le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
979 adev->gfx.rlc.avail_scratch_ram_locations =
980 le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
981 adev->gfx.rlc.reg_restore_list_size =
982 le32_to_cpu(rlc_hdr->reg_restore_list_size);
983 adev->gfx.rlc.reg_list_format_start =
984 le32_to_cpu(rlc_hdr->reg_list_format_start);
985 adev->gfx.rlc.reg_list_format_separate_start =
986 le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
987 adev->gfx.rlc.starting_offsets_start =
988 le32_to_cpu(rlc_hdr->starting_offsets_start);
989 adev->gfx.rlc.reg_list_format_size_bytes =
990 le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
991 adev->gfx.rlc.reg_list_size_bytes =
992 le32_to_cpu(rlc_hdr->reg_list_size_bytes);
994 adev->gfx.rlc.register_list_format =
995 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
996 adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
998 if (!adev->gfx.rlc.register_list_format) {
1003 tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1004 le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1005 for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++)
1006 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
1008 adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1010 tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1011 le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1012 for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++)
1013 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
1015 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1016 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1019 err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1022 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1023 adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1024 adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1026 if ((adev->asic_type != CHIP_STONEY) &&
1027 (adev->asic_type != CHIP_TOPAZ)) {
1028 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1029 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1031 err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1034 cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1035 adev->gfx.mec2_fw->data;
1036 adev->gfx.mec2_fw_version =
1037 le32_to_cpu(cp_hdr->header.ucode_version);
1038 adev->gfx.mec2_feature_version =
1039 le32_to_cpu(cp_hdr->ucode_feature_version);
1042 adev->gfx.mec2_fw = NULL;
1046 if (adev->firmware.load_type == AMDGPU_FW_LOAD_SMU) {
1047 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1048 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1049 info->fw = adev->gfx.pfp_fw;
1050 header = (const struct common_firmware_header *)info->fw->data;
1051 adev->firmware.fw_size +=
1052 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1054 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1055 info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1056 info->fw = adev->gfx.me_fw;
1057 header = (const struct common_firmware_header *)info->fw->data;
1058 adev->firmware.fw_size +=
1059 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1061 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1062 info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1063 info->fw = adev->gfx.ce_fw;
1064 header = (const struct common_firmware_header *)info->fw->data;
1065 adev->firmware.fw_size +=
1066 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1068 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1069 info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1070 info->fw = adev->gfx.rlc_fw;
1071 header = (const struct common_firmware_header *)info->fw->data;
1072 adev->firmware.fw_size +=
1073 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1075 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1076 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1077 info->fw = adev->gfx.mec_fw;
1078 header = (const struct common_firmware_header *)info->fw->data;
1079 adev->firmware.fw_size +=
1080 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1082 /* we need account JT in */
1083 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1084 adev->firmware.fw_size +=
1085 ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE);
1087 if (amdgpu_sriov_vf(adev)) {
1088 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_STORAGE];
1089 info->ucode_id = AMDGPU_UCODE_ID_STORAGE;
1090 info->fw = adev->gfx.mec_fw;
1091 adev->firmware.fw_size +=
1092 ALIGN(le32_to_cpu(64 * PAGE_SIZE), PAGE_SIZE);
1095 if (adev->gfx.mec2_fw) {
1096 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1097 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1098 info->fw = adev->gfx.mec2_fw;
1099 header = (const struct common_firmware_header *)info->fw->data;
1100 adev->firmware.fw_size +=
1101 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1109 "gfx8: Failed to load firmware \"%s\"\n",
1111 release_firmware(adev->gfx.pfp_fw);
1112 adev->gfx.pfp_fw = NULL;
1113 release_firmware(adev->gfx.me_fw);
1114 adev->gfx.me_fw = NULL;
1115 release_firmware(adev->gfx.ce_fw);
1116 adev->gfx.ce_fw = NULL;
1117 release_firmware(adev->gfx.rlc_fw);
1118 adev->gfx.rlc_fw = NULL;
1119 release_firmware(adev->gfx.mec_fw);
1120 adev->gfx.mec_fw = NULL;
1121 release_firmware(adev->gfx.mec2_fw);
1122 adev->gfx.mec2_fw = NULL;
1127 static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
1128 volatile u32 *buffer)
1131 const struct cs_section_def *sect = NULL;
1132 const struct cs_extent_def *ext = NULL;
1134 if (adev->gfx.rlc.cs_data == NULL)
1139 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1140 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1142 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1143 buffer[count++] = cpu_to_le32(0x80000000);
1144 buffer[count++] = cpu_to_le32(0x80000000);
1146 for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1147 for (ext = sect->section; ext->extent != NULL; ++ext) {
1148 if (sect->id == SECT_CONTEXT) {
1150 cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1151 buffer[count++] = cpu_to_le32(ext->reg_index -
1152 PACKET3_SET_CONTEXT_REG_START);
1153 for (i = 0; i < ext->reg_count; i++)
1154 buffer[count++] = cpu_to_le32(ext->extent[i]);
1161 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1162 buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
1163 PACKET3_SET_CONTEXT_REG_START);
1164 buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config);
1165 buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1);
1167 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1168 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1170 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1171 buffer[count++] = cpu_to_le32(0);
1174 static void cz_init_cp_jump_table(struct amdgpu_device *adev)
1176 const __le32 *fw_data;
1177 volatile u32 *dst_ptr;
1178 int me, i, max_me = 4;
1180 u32 table_offset, table_size;
1182 if (adev->asic_type == CHIP_CARRIZO)
1185 /* write the cp table buffer */
1186 dst_ptr = adev->gfx.rlc.cp_table_ptr;
1187 for (me = 0; me < max_me; me++) {
1189 const struct gfx_firmware_header_v1_0 *hdr =
1190 (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1191 fw_data = (const __le32 *)
1192 (adev->gfx.ce_fw->data +
1193 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1194 table_offset = le32_to_cpu(hdr->jt_offset);
1195 table_size = le32_to_cpu(hdr->jt_size);
1196 } else if (me == 1) {
1197 const struct gfx_firmware_header_v1_0 *hdr =
1198 (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1199 fw_data = (const __le32 *)
1200 (adev->gfx.pfp_fw->data +
1201 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1202 table_offset = le32_to_cpu(hdr->jt_offset);
1203 table_size = le32_to_cpu(hdr->jt_size);
1204 } else if (me == 2) {
1205 const struct gfx_firmware_header_v1_0 *hdr =
1206 (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1207 fw_data = (const __le32 *)
1208 (adev->gfx.me_fw->data +
1209 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1210 table_offset = le32_to_cpu(hdr->jt_offset);
1211 table_size = le32_to_cpu(hdr->jt_size);
1212 } else if (me == 3) {
1213 const struct gfx_firmware_header_v1_0 *hdr =
1214 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1215 fw_data = (const __le32 *)
1216 (adev->gfx.mec_fw->data +
1217 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1218 table_offset = le32_to_cpu(hdr->jt_offset);
1219 table_size = le32_to_cpu(hdr->jt_size);
1220 } else if (me == 4) {
1221 const struct gfx_firmware_header_v1_0 *hdr =
1222 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
1223 fw_data = (const __le32 *)
1224 (adev->gfx.mec2_fw->data +
1225 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1226 table_offset = le32_to_cpu(hdr->jt_offset);
1227 table_size = le32_to_cpu(hdr->jt_size);
1230 for (i = 0; i < table_size; i ++) {
1231 dst_ptr[bo_offset + i] =
1232 cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
1235 bo_offset += table_size;
1239 static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev)
1241 amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, NULL, NULL);
1242 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, NULL, NULL);
1245 static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
1247 volatile u32 *dst_ptr;
1249 const struct cs_section_def *cs_data;
1252 adev->gfx.rlc.cs_data = vi_cs_data;
1254 cs_data = adev->gfx.rlc.cs_data;
1257 /* clear state block */
1258 adev->gfx.rlc.clear_state_size = dws = gfx_v8_0_get_csb_size(adev);
1260 r = amdgpu_bo_create_reserved(adev, dws * 4, PAGE_SIZE,
1261 AMDGPU_GEM_DOMAIN_VRAM,
1262 &adev->gfx.rlc.clear_state_obj,
1263 &adev->gfx.rlc.clear_state_gpu_addr,
1264 (void **)&adev->gfx.rlc.cs_ptr);
1266 dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r);
1267 gfx_v8_0_rlc_fini(adev);
1271 /* set up the cs buffer */
1272 dst_ptr = adev->gfx.rlc.cs_ptr;
1273 gfx_v8_0_get_csb_buffer(adev, dst_ptr);
1274 amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
1275 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1278 if ((adev->asic_type == CHIP_CARRIZO) ||
1279 (adev->asic_type == CHIP_STONEY)) {
1280 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1281 r = amdgpu_bo_create_reserved(adev, adev->gfx.rlc.cp_table_size,
1282 PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
1283 &adev->gfx.rlc.cp_table_obj,
1284 &adev->gfx.rlc.cp_table_gpu_addr,
1285 (void **)&adev->gfx.rlc.cp_table_ptr);
1287 dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r);
1291 cz_init_cp_jump_table(adev);
1293 amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj);
1294 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1300 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
1302 amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1305 static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
1309 size_t mec_hpd_size;
1311 bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1313 /* take ownership of the relevant compute queues */
1314 amdgpu_gfx_compute_queue_acquire(adev);
1316 mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE;
1318 r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1319 AMDGPU_GEM_DOMAIN_GTT,
1320 &adev->gfx.mec.hpd_eop_obj,
1321 &adev->gfx.mec.hpd_eop_gpu_addr,
1324 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1328 memset(hpd, 0, mec_hpd_size);
1330 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1331 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1336 static const u32 vgpr_init_compute_shader[] =
1338 0x7e000209, 0x7e020208,
1339 0x7e040207, 0x7e060206,
1340 0x7e080205, 0x7e0a0204,
1341 0x7e0c0203, 0x7e0e0202,
1342 0x7e100201, 0x7e120200,
1343 0x7e140209, 0x7e160208,
1344 0x7e180207, 0x7e1a0206,
1345 0x7e1c0205, 0x7e1e0204,
1346 0x7e200203, 0x7e220202,
1347 0x7e240201, 0x7e260200,
1348 0x7e280209, 0x7e2a0208,
1349 0x7e2c0207, 0x7e2e0206,
1350 0x7e300205, 0x7e320204,
1351 0x7e340203, 0x7e360202,
1352 0x7e380201, 0x7e3a0200,
1353 0x7e3c0209, 0x7e3e0208,
1354 0x7e400207, 0x7e420206,
1355 0x7e440205, 0x7e460204,
1356 0x7e480203, 0x7e4a0202,
1357 0x7e4c0201, 0x7e4e0200,
1358 0x7e500209, 0x7e520208,
1359 0x7e540207, 0x7e560206,
1360 0x7e580205, 0x7e5a0204,
1361 0x7e5c0203, 0x7e5e0202,
1362 0x7e600201, 0x7e620200,
1363 0x7e640209, 0x7e660208,
1364 0x7e680207, 0x7e6a0206,
1365 0x7e6c0205, 0x7e6e0204,
1366 0x7e700203, 0x7e720202,
1367 0x7e740201, 0x7e760200,
1368 0x7e780209, 0x7e7a0208,
1369 0x7e7c0207, 0x7e7e0206,
1370 0xbf8a0000, 0xbf810000,
1373 static const u32 sgpr_init_compute_shader[] =
1375 0xbe8a0100, 0xbe8c0102,
1376 0xbe8e0104, 0xbe900106,
1377 0xbe920108, 0xbe940100,
1378 0xbe960102, 0xbe980104,
1379 0xbe9a0106, 0xbe9c0108,
1380 0xbe9e0100, 0xbea00102,
1381 0xbea20104, 0xbea40106,
1382 0xbea60108, 0xbea80100,
1383 0xbeaa0102, 0xbeac0104,
1384 0xbeae0106, 0xbeb00108,
1385 0xbeb20100, 0xbeb40102,
1386 0xbeb60104, 0xbeb80106,
1387 0xbeba0108, 0xbebc0100,
1388 0xbebe0102, 0xbec00104,
1389 0xbec20106, 0xbec40108,
1390 0xbec60100, 0xbec80102,
1391 0xbee60004, 0xbee70005,
1392 0xbeea0006, 0xbeeb0007,
1393 0xbee80008, 0xbee90009,
1394 0xbefc0000, 0xbf8a0000,
1395 0xbf810000, 0x00000000,
1398 static const u32 vgpr_init_regs[] =
1400 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1401 mmCOMPUTE_RESOURCE_LIMITS, 0,
1402 mmCOMPUTE_NUM_THREAD_X, 256*4,
1403 mmCOMPUTE_NUM_THREAD_Y, 1,
1404 mmCOMPUTE_NUM_THREAD_Z, 1,
1405 mmCOMPUTE_PGM_RSRC2, 20,
1406 mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1407 mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1408 mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1409 mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1410 mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1411 mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1412 mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1413 mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1414 mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1415 mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1418 static const u32 sgpr1_init_regs[] =
1420 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1421 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1422 mmCOMPUTE_NUM_THREAD_X, 256*5,
1423 mmCOMPUTE_NUM_THREAD_Y, 1,
1424 mmCOMPUTE_NUM_THREAD_Z, 1,
1425 mmCOMPUTE_PGM_RSRC2, 20,
1426 mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1427 mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1428 mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1429 mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1430 mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1431 mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1432 mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1433 mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1434 mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1435 mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1438 static const u32 sgpr2_init_regs[] =
1440 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1441 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1442 mmCOMPUTE_NUM_THREAD_X, 256*5,
1443 mmCOMPUTE_NUM_THREAD_Y, 1,
1444 mmCOMPUTE_NUM_THREAD_Z, 1,
1445 mmCOMPUTE_PGM_RSRC2, 20,
1446 mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1447 mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1448 mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1449 mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1450 mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1451 mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1452 mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1453 mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1454 mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1455 mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1458 static const u32 sec_ded_counter_registers[] =
1461 mmCPC_EDC_SCRATCH_CNT,
1462 mmCPC_EDC_UCODE_CNT,
1469 mmDC_EDC_CSINVOC_CNT,
1470 mmDC_EDC_RESTORE_CNT,
1476 mmSQC_ATC_EDC_GATCL1_CNT,
1482 mmTCP_ATC_EDC_GATCL1_CNT,
1487 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1489 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1490 struct amdgpu_ib ib;
1491 struct dma_fence *f = NULL;
1494 unsigned total_size, vgpr_offset, sgpr_offset;
1497 /* only supported on CZ */
1498 if (adev->asic_type != CHIP_CARRIZO)
1501 /* bail if the compute ring is not ready */
1505 tmp = RREG32(mmGB_EDC_MODE);
1506 WREG32(mmGB_EDC_MODE, 0);
1509 (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1511 (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1513 (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1514 total_size = ALIGN(total_size, 256);
1515 vgpr_offset = total_size;
1516 total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1517 sgpr_offset = total_size;
1518 total_size += sizeof(sgpr_init_compute_shader);
1520 /* allocate an indirect buffer to put the commands in */
1521 memset(&ib, 0, sizeof(ib));
1522 r = amdgpu_ib_get(adev, NULL, total_size, &ib);
1524 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1528 /* load the compute shaders */
1529 for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1530 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1532 for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1533 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1535 /* init the ib length to 0 */
1539 /* write the register state for the compute dispatch */
1540 for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1541 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1542 ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1543 ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1545 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1546 gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1547 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1548 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1549 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1550 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1552 /* write dispatch packet */
1553 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1554 ib.ptr[ib.length_dw++] = 8; /* x */
1555 ib.ptr[ib.length_dw++] = 1; /* y */
1556 ib.ptr[ib.length_dw++] = 1; /* z */
1557 ib.ptr[ib.length_dw++] =
1558 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1560 /* write CS partial flush packet */
1561 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1562 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1565 /* write the register state for the compute dispatch */
1566 for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1567 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1568 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1569 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1571 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1572 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1573 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1574 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1575 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1576 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1578 /* write dispatch packet */
1579 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1580 ib.ptr[ib.length_dw++] = 8; /* x */
1581 ib.ptr[ib.length_dw++] = 1; /* y */
1582 ib.ptr[ib.length_dw++] = 1; /* z */
1583 ib.ptr[ib.length_dw++] =
1584 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1586 /* write CS partial flush packet */
1587 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1588 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1591 /* write the register state for the compute dispatch */
1592 for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1593 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1594 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1595 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1597 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1598 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1599 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1600 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1601 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1602 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1604 /* write dispatch packet */
1605 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1606 ib.ptr[ib.length_dw++] = 8; /* x */
1607 ib.ptr[ib.length_dw++] = 1; /* y */
1608 ib.ptr[ib.length_dw++] = 1; /* z */
1609 ib.ptr[ib.length_dw++] =
1610 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1612 /* write CS partial flush packet */
1613 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1614 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1616 /* shedule the ib on the ring */
1617 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1619 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1623 /* wait for the GPU to finish processing the IB */
1624 r = dma_fence_wait(f, false);
1626 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1630 tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1631 tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1632 WREG32(mmGB_EDC_MODE, tmp);
1634 tmp = RREG32(mmCC_GC_EDC_CONFIG);
1635 tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1636 WREG32(mmCC_GC_EDC_CONFIG, tmp);
1639 /* read back registers to clear the counters */
1640 for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1641 RREG32(sec_ded_counter_registers[i]);
1644 amdgpu_ib_free(adev, &ib, NULL);
1650 static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1653 u32 mc_shared_chmap, mc_arb_ramcfg;
1654 u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1658 switch (adev->asic_type) {
1660 adev->gfx.config.max_shader_engines = 1;
1661 adev->gfx.config.max_tile_pipes = 2;
1662 adev->gfx.config.max_cu_per_sh = 6;
1663 adev->gfx.config.max_sh_per_se = 1;
1664 adev->gfx.config.max_backends_per_se = 2;
1665 adev->gfx.config.max_texture_channel_caches = 2;
1666 adev->gfx.config.max_gprs = 256;
1667 adev->gfx.config.max_gs_threads = 32;
1668 adev->gfx.config.max_hw_contexts = 8;
1670 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1671 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1672 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1673 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1674 gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1677 adev->gfx.config.max_shader_engines = 4;
1678 adev->gfx.config.max_tile_pipes = 16;
1679 adev->gfx.config.max_cu_per_sh = 16;
1680 adev->gfx.config.max_sh_per_se = 1;
1681 adev->gfx.config.max_backends_per_se = 4;
1682 adev->gfx.config.max_texture_channel_caches = 16;
1683 adev->gfx.config.max_gprs = 256;
1684 adev->gfx.config.max_gs_threads = 32;
1685 adev->gfx.config.max_hw_contexts = 8;
1687 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1688 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1689 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1690 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1691 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1693 case CHIP_POLARIS11:
1694 case CHIP_POLARIS12:
1695 ret = amdgpu_atombios_get_gfx_info(adev);
1698 adev->gfx.config.max_gprs = 256;
1699 adev->gfx.config.max_gs_threads = 32;
1700 adev->gfx.config.max_hw_contexts = 8;
1702 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1703 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1704 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1705 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1706 gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
1708 case CHIP_POLARIS10:
1709 ret = amdgpu_atombios_get_gfx_info(adev);
1712 adev->gfx.config.max_gprs = 256;
1713 adev->gfx.config.max_gs_threads = 32;
1714 adev->gfx.config.max_hw_contexts = 8;
1716 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1717 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1718 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1719 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1720 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1723 adev->gfx.config.max_shader_engines = 4;
1724 adev->gfx.config.max_tile_pipes = 8;
1725 adev->gfx.config.max_cu_per_sh = 8;
1726 adev->gfx.config.max_sh_per_se = 1;
1727 adev->gfx.config.max_backends_per_se = 2;
1728 adev->gfx.config.max_texture_channel_caches = 8;
1729 adev->gfx.config.max_gprs = 256;
1730 adev->gfx.config.max_gs_threads = 32;
1731 adev->gfx.config.max_hw_contexts = 8;
1733 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1734 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1735 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1736 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1737 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1740 adev->gfx.config.max_shader_engines = 1;
1741 adev->gfx.config.max_tile_pipes = 2;
1742 adev->gfx.config.max_sh_per_se = 1;
1743 adev->gfx.config.max_backends_per_se = 2;
1744 adev->gfx.config.max_cu_per_sh = 8;
1745 adev->gfx.config.max_texture_channel_caches = 2;
1746 adev->gfx.config.max_gprs = 256;
1747 adev->gfx.config.max_gs_threads = 32;
1748 adev->gfx.config.max_hw_contexts = 8;
1750 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1751 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1752 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1753 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1754 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1757 adev->gfx.config.max_shader_engines = 1;
1758 adev->gfx.config.max_tile_pipes = 2;
1759 adev->gfx.config.max_sh_per_se = 1;
1760 adev->gfx.config.max_backends_per_se = 1;
1761 adev->gfx.config.max_cu_per_sh = 3;
1762 adev->gfx.config.max_texture_channel_caches = 2;
1763 adev->gfx.config.max_gprs = 256;
1764 adev->gfx.config.max_gs_threads = 16;
1765 adev->gfx.config.max_hw_contexts = 8;
1767 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1768 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1769 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1770 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1771 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1774 adev->gfx.config.max_shader_engines = 2;
1775 adev->gfx.config.max_tile_pipes = 4;
1776 adev->gfx.config.max_cu_per_sh = 2;
1777 adev->gfx.config.max_sh_per_se = 1;
1778 adev->gfx.config.max_backends_per_se = 2;
1779 adev->gfx.config.max_texture_channel_caches = 4;
1780 adev->gfx.config.max_gprs = 256;
1781 adev->gfx.config.max_gs_threads = 32;
1782 adev->gfx.config.max_hw_contexts = 8;
1784 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1785 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1786 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1787 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1788 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1792 mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
1793 adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1794 mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1796 adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1797 adev->gfx.config.mem_max_burst_length_bytes = 256;
1798 if (adev->flags & AMD_IS_APU) {
1799 /* Get memory bank mapping mode. */
1800 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
1801 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1802 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1804 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
1805 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1806 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1808 /* Validate settings in case only one DIMM installed. */
1809 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
1810 dimm00_addr_map = 0;
1811 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
1812 dimm01_addr_map = 0;
1813 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
1814 dimm10_addr_map = 0;
1815 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
1816 dimm11_addr_map = 0;
1818 /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1819 /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1820 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
1821 adev->gfx.config.mem_row_size_in_kb = 2;
1823 adev->gfx.config.mem_row_size_in_kb = 1;
1825 tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
1826 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1827 if (adev->gfx.config.mem_row_size_in_kb > 4)
1828 adev->gfx.config.mem_row_size_in_kb = 4;
1831 adev->gfx.config.shader_engine_tile_size = 32;
1832 adev->gfx.config.num_gpus = 1;
1833 adev->gfx.config.multi_gpu_tile_size = 64;
1835 /* fix up row size */
1836 switch (adev->gfx.config.mem_row_size_in_kb) {
1839 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
1842 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
1845 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
1848 adev->gfx.config.gb_addr_config = gb_addr_config;
1853 static int gfx_v8_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
1854 int mec, int pipe, int queue)
1858 struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
1860 ring = &adev->gfx.compute_ring[ring_id];
1865 ring->queue = queue;
1867 ring->ring_obj = NULL;
1868 ring->use_doorbell = true;
1869 ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + ring_id;
1870 ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
1871 + (ring_id * GFX8_MEC_HPD_SIZE);
1872 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
1874 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
1875 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
1878 /* type-2 packets are deprecated on MEC, use type-3 instead */
1879 r = amdgpu_ring_init(adev, ring, 1024,
1880 &adev->gfx.eop_irq, irq_type);
1888 static int gfx_v8_0_sw_init(void *handle)
1890 int i, j, k, r, ring_id;
1891 struct amdgpu_ring *ring;
1892 struct amdgpu_kiq *kiq;
1893 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1895 switch (adev->asic_type) {
1898 case CHIP_POLARIS11:
1899 case CHIP_POLARIS12:
1900 case CHIP_POLARIS10:
1902 adev->gfx.mec.num_mec = 2;
1907 adev->gfx.mec.num_mec = 1;
1911 adev->gfx.mec.num_pipe_per_mec = 4;
1912 adev->gfx.mec.num_queue_per_pipe = 8;
1915 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 178, &adev->gfx.kiq.irq);
1920 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 181, &adev->gfx.eop_irq);
1924 /* Privileged reg */
1925 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 184,
1926 &adev->gfx.priv_reg_irq);
1930 /* Privileged inst */
1931 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 185,
1932 &adev->gfx.priv_inst_irq);
1936 adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
1938 gfx_v8_0_scratch_init(adev);
1940 r = gfx_v8_0_init_microcode(adev);
1942 DRM_ERROR("Failed to load gfx firmware!\n");
1946 r = gfx_v8_0_rlc_init(adev);
1948 DRM_ERROR("Failed to init rlc BOs!\n");
1952 r = gfx_v8_0_mec_init(adev);
1954 DRM_ERROR("Failed to init MEC BOs!\n");
1958 /* set up the gfx ring */
1959 for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
1960 ring = &adev->gfx.gfx_ring[i];
1961 ring->ring_obj = NULL;
1962 sprintf(ring->name, "gfx");
1963 /* no gfx doorbells on iceland */
1964 if (adev->asic_type != CHIP_TOPAZ) {
1965 ring->use_doorbell = true;
1966 ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0;
1969 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
1970 AMDGPU_CP_IRQ_GFX_EOP);
1976 /* set up the compute queues - allocate horizontally across pipes */
1978 for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
1979 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
1980 for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
1981 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
1984 r = gfx_v8_0_compute_ring_init(adev,
1995 r = amdgpu_gfx_kiq_init(adev, GFX8_MEC_HPD_SIZE);
1997 DRM_ERROR("Failed to init KIQ BOs!\n");
2001 kiq = &adev->gfx.kiq;
2002 r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
2006 /* create MQD for all compute queues as well as KIQ for SRIOV case */
2007 r = amdgpu_gfx_compute_mqd_sw_init(adev, sizeof(struct vi_mqd_allocation));
2011 /* reserve GDS, GWS and OA resource for gfx */
2012 r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size,
2013 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS,
2014 &adev->gds.gds_gfx_bo, NULL, NULL);
2018 r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size,
2019 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS,
2020 &adev->gds.gws_gfx_bo, NULL, NULL);
2024 r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size,
2025 PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA,
2026 &adev->gds.oa_gfx_bo, NULL, NULL);
2030 adev->gfx.ce_ram_size = 0x8000;
2032 r = gfx_v8_0_gpu_early_init(adev);
2039 static int gfx_v8_0_sw_fini(void *handle)
2042 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2044 amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
2045 amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL);
2046 amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
2048 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2049 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2050 for (i = 0; i < adev->gfx.num_compute_rings; i++)
2051 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2053 amdgpu_gfx_compute_mqd_sw_fini(adev);
2054 amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
2055 amdgpu_gfx_kiq_fini(adev);
2057 gfx_v8_0_mec_fini(adev);
2058 gfx_v8_0_rlc_fini(adev);
2059 gfx_v8_0_free_microcode(adev);
2064 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
2066 uint32_t *modearray, *mod2array;
2067 const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
2068 const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
2071 modearray = adev->gfx.config.tile_mode_array;
2072 mod2array = adev->gfx.config.macrotile_mode_array;
2074 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2075 modearray[reg_offset] = 0;
2077 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2078 mod2array[reg_offset] = 0;
2080 switch (adev->asic_type) {
2082 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2083 PIPE_CONFIG(ADDR_SURF_P2) |
2084 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2085 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2086 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2087 PIPE_CONFIG(ADDR_SURF_P2) |
2088 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2089 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2090 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2091 PIPE_CONFIG(ADDR_SURF_P2) |
2092 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2093 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2094 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2095 PIPE_CONFIG(ADDR_SURF_P2) |
2096 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2097 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2098 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2099 PIPE_CONFIG(ADDR_SURF_P2) |
2100 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2101 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2102 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2103 PIPE_CONFIG(ADDR_SURF_P2) |
2104 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2105 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2106 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2107 PIPE_CONFIG(ADDR_SURF_P2) |
2108 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2109 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2110 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2111 PIPE_CONFIG(ADDR_SURF_P2));
2112 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2113 PIPE_CONFIG(ADDR_SURF_P2) |
2114 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2115 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2116 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2117 PIPE_CONFIG(ADDR_SURF_P2) |
2118 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2119 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2120 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2121 PIPE_CONFIG(ADDR_SURF_P2) |
2122 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2123 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2124 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2125 PIPE_CONFIG(ADDR_SURF_P2) |
2126 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2127 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2128 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2129 PIPE_CONFIG(ADDR_SURF_P2) |
2130 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2131 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2132 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2133 PIPE_CONFIG(ADDR_SURF_P2) |
2134 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2135 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2136 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2137 PIPE_CONFIG(ADDR_SURF_P2) |
2138 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2139 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2140 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2141 PIPE_CONFIG(ADDR_SURF_P2) |
2142 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2143 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2144 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2145 PIPE_CONFIG(ADDR_SURF_P2) |
2146 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2147 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2148 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2149 PIPE_CONFIG(ADDR_SURF_P2) |
2150 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2151 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2152 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2153 PIPE_CONFIG(ADDR_SURF_P2) |
2154 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2155 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2156 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2157 PIPE_CONFIG(ADDR_SURF_P2) |
2158 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2159 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2160 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2161 PIPE_CONFIG(ADDR_SURF_P2) |
2162 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2163 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2164 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2165 PIPE_CONFIG(ADDR_SURF_P2) |
2166 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2167 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2168 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2169 PIPE_CONFIG(ADDR_SURF_P2) |
2170 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2171 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2172 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2173 PIPE_CONFIG(ADDR_SURF_P2) |
2174 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2175 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2176 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2177 PIPE_CONFIG(ADDR_SURF_P2) |
2178 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2179 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2180 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2181 PIPE_CONFIG(ADDR_SURF_P2) |
2182 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2183 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2185 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2186 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2187 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2188 NUM_BANKS(ADDR_SURF_8_BANK));
2189 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2190 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2191 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2192 NUM_BANKS(ADDR_SURF_8_BANK));
2193 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2194 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2195 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2196 NUM_BANKS(ADDR_SURF_8_BANK));
2197 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2198 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2199 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2200 NUM_BANKS(ADDR_SURF_8_BANK));
2201 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2202 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2203 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2204 NUM_BANKS(ADDR_SURF_8_BANK));
2205 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2206 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2207 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2208 NUM_BANKS(ADDR_SURF_8_BANK));
2209 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2210 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2211 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2212 NUM_BANKS(ADDR_SURF_8_BANK));
2213 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2214 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2215 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2216 NUM_BANKS(ADDR_SURF_16_BANK));
2217 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2218 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2219 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2220 NUM_BANKS(ADDR_SURF_16_BANK));
2221 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2222 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2223 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2224 NUM_BANKS(ADDR_SURF_16_BANK));
2225 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2226 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2227 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2228 NUM_BANKS(ADDR_SURF_16_BANK));
2229 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2230 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2231 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2232 NUM_BANKS(ADDR_SURF_16_BANK));
2233 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2234 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2235 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2236 NUM_BANKS(ADDR_SURF_16_BANK));
2237 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2238 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2239 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2240 NUM_BANKS(ADDR_SURF_8_BANK));
2242 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2243 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2245 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2247 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2248 if (reg_offset != 7)
2249 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2253 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2254 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2255 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2256 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2257 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2258 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2259 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2260 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2261 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2262 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2263 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2264 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2265 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2266 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2267 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2268 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2269 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2270 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2271 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2272 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2273 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2274 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2275 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2276 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2277 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2278 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2279 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2280 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2281 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2282 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2283 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2284 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2285 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2286 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2287 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2288 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2289 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2290 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2291 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2292 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2293 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2294 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2295 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2296 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2297 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2298 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2299 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2300 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2301 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2302 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2303 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2304 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2305 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2306 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2307 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2308 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2309 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2310 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2311 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2312 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2313 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2314 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2315 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2316 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2317 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2318 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2319 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2320 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2321 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2322 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2323 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2324 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2325 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2326 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2327 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2328 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2329 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2330 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2331 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2332 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2333 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2334 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2335 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2336 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2337 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2338 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2339 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2340 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2341 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2342 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2343 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2344 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2345 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2346 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2347 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2348 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2349 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2350 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2351 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2352 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2353 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2354 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2355 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2356 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2357 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2358 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2359 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2360 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2361 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2362 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2363 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2364 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2365 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2366 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2367 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2368 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2369 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2370 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2371 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2372 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2373 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2374 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2376 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2377 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2378 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2379 NUM_BANKS(ADDR_SURF_8_BANK));
2380 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2381 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2382 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2383 NUM_BANKS(ADDR_SURF_8_BANK));
2384 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2385 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2386 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2387 NUM_BANKS(ADDR_SURF_8_BANK));
2388 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2389 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2390 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2391 NUM_BANKS(ADDR_SURF_8_BANK));
2392 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2393 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2394 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2395 NUM_BANKS(ADDR_SURF_8_BANK));
2396 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2397 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2398 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2399 NUM_BANKS(ADDR_SURF_8_BANK));
2400 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2401 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2402 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2403 NUM_BANKS(ADDR_SURF_8_BANK));
2404 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2405 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2406 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2407 NUM_BANKS(ADDR_SURF_8_BANK));
2408 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2409 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2410 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2411 NUM_BANKS(ADDR_SURF_8_BANK));
2412 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2413 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2414 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2415 NUM_BANKS(ADDR_SURF_8_BANK));
2416 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2417 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2418 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2419 NUM_BANKS(ADDR_SURF_8_BANK));
2420 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2421 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2422 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2423 NUM_BANKS(ADDR_SURF_8_BANK));
2424 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2425 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2426 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2427 NUM_BANKS(ADDR_SURF_8_BANK));
2428 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2429 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2430 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2431 NUM_BANKS(ADDR_SURF_4_BANK));
2433 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2434 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2436 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2437 if (reg_offset != 7)
2438 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2442 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2443 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2444 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2445 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2446 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2447 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2448 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2449 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2450 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2451 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2452 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2453 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2454 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2455 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2456 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2457 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2458 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2459 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2460 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2461 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2462 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2463 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2464 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2465 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2466 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2467 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2468 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2469 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2470 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2471 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2472 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2473 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2474 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2475 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2476 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2477 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2478 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2479 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2480 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2481 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2482 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2483 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2484 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2485 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2486 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2487 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2488 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2489 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2490 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2491 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2492 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2493 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2494 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2495 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2496 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2497 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2498 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2499 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2500 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2501 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2502 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2503 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2504 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2505 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2506 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2507 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2508 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2509 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2510 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2511 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2512 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2513 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2514 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2515 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2516 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2517 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2518 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2519 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2520 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2521 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2522 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2523 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2524 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2525 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2526 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2527 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2528 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2529 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2530 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2531 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2532 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2533 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2534 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2535 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2536 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2537 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2538 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2539 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2540 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2541 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2542 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2543 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2544 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2545 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2546 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2547 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2548 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2549 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2550 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2551 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2552 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2553 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2554 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2555 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2556 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2557 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2558 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2559 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2560 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2561 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2562 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2563 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2565 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2566 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2567 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2568 NUM_BANKS(ADDR_SURF_16_BANK));
2569 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2570 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2571 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2572 NUM_BANKS(ADDR_SURF_16_BANK));
2573 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2574 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2575 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2576 NUM_BANKS(ADDR_SURF_16_BANK));
2577 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2578 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2579 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2580 NUM_BANKS(ADDR_SURF_16_BANK));
2581 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2582 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2583 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2584 NUM_BANKS(ADDR_SURF_16_BANK));
2585 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2586 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2587 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2588 NUM_BANKS(ADDR_SURF_16_BANK));
2589 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2590 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2591 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2592 NUM_BANKS(ADDR_SURF_16_BANK));
2593 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2594 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2595 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2596 NUM_BANKS(ADDR_SURF_16_BANK));
2597 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2598 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2599 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2600 NUM_BANKS(ADDR_SURF_16_BANK));
2601 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2602 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2603 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2604 NUM_BANKS(ADDR_SURF_16_BANK));
2605 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2606 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2607 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2608 NUM_BANKS(ADDR_SURF_16_BANK));
2609 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2610 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2611 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2612 NUM_BANKS(ADDR_SURF_8_BANK));
2613 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2614 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2615 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2616 NUM_BANKS(ADDR_SURF_4_BANK));
2617 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2618 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2619 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2620 NUM_BANKS(ADDR_SURF_4_BANK));
2622 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2623 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2625 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2626 if (reg_offset != 7)
2627 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2630 case CHIP_POLARIS11:
2631 case CHIP_POLARIS12:
2632 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2633 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2634 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2635 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2636 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2637 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2638 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2639 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2640 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2641 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2642 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2643 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2644 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2645 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2646 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2647 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2648 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2649 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2650 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2651 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2652 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2653 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2654 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2655 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2656 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2657 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2658 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2659 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2660 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2661 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2662 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2663 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2664 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2665 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2666 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2667 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2668 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2669 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2670 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2671 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2672 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2673 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2674 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2675 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2676 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2677 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2678 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2679 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2680 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2681 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2682 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2683 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2684 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2685 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2686 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2687 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2688 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2689 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2690 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2691 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2692 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2693 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2694 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2695 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2696 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2697 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2698 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2699 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2700 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2701 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2702 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2703 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2704 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2705 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2706 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2707 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2708 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2709 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2710 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2711 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2712 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2713 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2714 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2715 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2716 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2717 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2718 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2719 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2720 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2721 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2722 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2723 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2724 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2725 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2726 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2727 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2728 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2729 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2730 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2731 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2732 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2733 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2734 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2735 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2736 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2737 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2738 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2739 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2740 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2741 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2742 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2743 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2744 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2745 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2746 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2747 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2748 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2749 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2750 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2751 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2752 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2753 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2755 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2756 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2757 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2758 NUM_BANKS(ADDR_SURF_16_BANK));
2760 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2761 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2762 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2763 NUM_BANKS(ADDR_SURF_16_BANK));
2765 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2766 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2767 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2768 NUM_BANKS(ADDR_SURF_16_BANK));
2770 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2771 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2772 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2773 NUM_BANKS(ADDR_SURF_16_BANK));
2775 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2776 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2777 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2778 NUM_BANKS(ADDR_SURF_16_BANK));
2780 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2781 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2782 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2783 NUM_BANKS(ADDR_SURF_16_BANK));
2785 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2786 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2787 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2788 NUM_BANKS(ADDR_SURF_16_BANK));
2790 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2791 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2792 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2793 NUM_BANKS(ADDR_SURF_16_BANK));
2795 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2796 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2797 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2798 NUM_BANKS(ADDR_SURF_16_BANK));
2800 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2801 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2802 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2803 NUM_BANKS(ADDR_SURF_16_BANK));
2805 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2806 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2807 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2808 NUM_BANKS(ADDR_SURF_16_BANK));
2810 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2811 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2812 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2813 NUM_BANKS(ADDR_SURF_16_BANK));
2815 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2816 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2817 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2818 NUM_BANKS(ADDR_SURF_8_BANK));
2820 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2821 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2822 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2823 NUM_BANKS(ADDR_SURF_4_BANK));
2825 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2826 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2828 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2829 if (reg_offset != 7)
2830 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2833 case CHIP_POLARIS10:
2834 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2835 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2836 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2837 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2838 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2839 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2840 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2841 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2842 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2843 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2844 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2845 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2846 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2847 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2848 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2849 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2850 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2851 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2852 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2853 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2854 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2855 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2856 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2857 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2858 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2859 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2860 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2861 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2862 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2863 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2864 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2865 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2866 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2867 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2868 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2869 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2870 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2871 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2872 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2873 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2874 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2875 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2876 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2877 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2878 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2879 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2880 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2881 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2882 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2883 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2884 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2885 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2886 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2887 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2888 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2889 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2890 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2891 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2892 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2893 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2894 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2895 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2896 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2897 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2898 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2899 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2900 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2901 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2902 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2903 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2904 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2905 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2906 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2907 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2908 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2909 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2910 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2911 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2912 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2913 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2914 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2915 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2916 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2917 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2918 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2919 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2920 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2921 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2922 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2923 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2924 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2925 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2926 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2927 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2928 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2929 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2930 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2931 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2932 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2933 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2934 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2935 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2936 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2937 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2938 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2939 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2940 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2941 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2942 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2943 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2944 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2945 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2946 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2947 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2948 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2949 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2950 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2951 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2952 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2953 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2954 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2955 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2957 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2958 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2959 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2960 NUM_BANKS(ADDR_SURF_16_BANK));
2962 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2963 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2964 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2965 NUM_BANKS(ADDR_SURF_16_BANK));
2967 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2968 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2969 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2970 NUM_BANKS(ADDR_SURF_16_BANK));
2972 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2973 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2974 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2975 NUM_BANKS(ADDR_SURF_16_BANK));
2977 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2978 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2979 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2980 NUM_BANKS(ADDR_SURF_16_BANK));
2982 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2983 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2984 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2985 NUM_BANKS(ADDR_SURF_16_BANK));
2987 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2988 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2989 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2990 NUM_BANKS(ADDR_SURF_16_BANK));
2992 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2993 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2994 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2995 NUM_BANKS(ADDR_SURF_16_BANK));
2997 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2998 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2999 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3000 NUM_BANKS(ADDR_SURF_16_BANK));
3002 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3003 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3004 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3005 NUM_BANKS(ADDR_SURF_16_BANK));
3007 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3008 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3009 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3010 NUM_BANKS(ADDR_SURF_16_BANK));
3012 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3013 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3014 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3015 NUM_BANKS(ADDR_SURF_8_BANK));
3017 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3018 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3019 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3020 NUM_BANKS(ADDR_SURF_4_BANK));
3022 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3023 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3024 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3025 NUM_BANKS(ADDR_SURF_4_BANK));
3027 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3028 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3030 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3031 if (reg_offset != 7)
3032 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3036 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3037 PIPE_CONFIG(ADDR_SURF_P2) |
3038 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3039 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3040 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3041 PIPE_CONFIG(ADDR_SURF_P2) |
3042 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3043 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3044 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3045 PIPE_CONFIG(ADDR_SURF_P2) |
3046 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3047 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3048 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3049 PIPE_CONFIG(ADDR_SURF_P2) |
3050 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3051 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3052 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3053 PIPE_CONFIG(ADDR_SURF_P2) |
3054 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3055 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3056 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3057 PIPE_CONFIG(ADDR_SURF_P2) |
3058 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3059 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3060 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3061 PIPE_CONFIG(ADDR_SURF_P2) |
3062 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3063 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3064 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3065 PIPE_CONFIG(ADDR_SURF_P2));
3066 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3067 PIPE_CONFIG(ADDR_SURF_P2) |
3068 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3069 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3070 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3071 PIPE_CONFIG(ADDR_SURF_P2) |
3072 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3073 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3074 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3075 PIPE_CONFIG(ADDR_SURF_P2) |
3076 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3077 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3078 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3079 PIPE_CONFIG(ADDR_SURF_P2) |
3080 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3081 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3082 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3083 PIPE_CONFIG(ADDR_SURF_P2) |
3084 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3085 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3086 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3087 PIPE_CONFIG(ADDR_SURF_P2) |
3088 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3089 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3090 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3091 PIPE_CONFIG(ADDR_SURF_P2) |
3092 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3093 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3094 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3095 PIPE_CONFIG(ADDR_SURF_P2) |
3096 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3097 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3098 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3099 PIPE_CONFIG(ADDR_SURF_P2) |
3100 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3101 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3102 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3103 PIPE_CONFIG(ADDR_SURF_P2) |
3104 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3105 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3106 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3107 PIPE_CONFIG(ADDR_SURF_P2) |
3108 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3109 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3110 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3111 PIPE_CONFIG(ADDR_SURF_P2) |
3112 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3113 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3114 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3115 PIPE_CONFIG(ADDR_SURF_P2) |
3116 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3117 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3118 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3119 PIPE_CONFIG(ADDR_SURF_P2) |
3120 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3121 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3122 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3123 PIPE_CONFIG(ADDR_SURF_P2) |
3124 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3125 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3126 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3127 PIPE_CONFIG(ADDR_SURF_P2) |
3128 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3129 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3130 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3131 PIPE_CONFIG(ADDR_SURF_P2) |
3132 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3133 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3134 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3135 PIPE_CONFIG(ADDR_SURF_P2) |
3136 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3137 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3139 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3140 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3141 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3142 NUM_BANKS(ADDR_SURF_8_BANK));
3143 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3144 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3145 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3146 NUM_BANKS(ADDR_SURF_8_BANK));
3147 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3148 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3149 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3150 NUM_BANKS(ADDR_SURF_8_BANK));
3151 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3152 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3153 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3154 NUM_BANKS(ADDR_SURF_8_BANK));
3155 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3156 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3157 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3158 NUM_BANKS(ADDR_SURF_8_BANK));
3159 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3160 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3161 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3162 NUM_BANKS(ADDR_SURF_8_BANK));
3163 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3164 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3165 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3166 NUM_BANKS(ADDR_SURF_8_BANK));
3167 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3168 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3169 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3170 NUM_BANKS(ADDR_SURF_16_BANK));
3171 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3172 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3173 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3174 NUM_BANKS(ADDR_SURF_16_BANK));
3175 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3176 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3177 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3178 NUM_BANKS(ADDR_SURF_16_BANK));
3179 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3180 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3181 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3182 NUM_BANKS(ADDR_SURF_16_BANK));
3183 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3184 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3185 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3186 NUM_BANKS(ADDR_SURF_16_BANK));
3187 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3188 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3189 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3190 NUM_BANKS(ADDR_SURF_16_BANK));
3191 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3192 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3193 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3194 NUM_BANKS(ADDR_SURF_8_BANK));
3196 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3197 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3199 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3201 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3202 if (reg_offset != 7)
3203 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3208 "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
3212 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3213 PIPE_CONFIG(ADDR_SURF_P2) |
3214 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3215 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3216 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3217 PIPE_CONFIG(ADDR_SURF_P2) |
3218 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3219 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3220 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3221 PIPE_CONFIG(ADDR_SURF_P2) |
3222 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3223 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3224 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3225 PIPE_CONFIG(ADDR_SURF_P2) |
3226 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3227 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3228 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3229 PIPE_CONFIG(ADDR_SURF_P2) |
3230 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3231 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3232 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3233 PIPE_CONFIG(ADDR_SURF_P2) |
3234 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3235 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3236 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3237 PIPE_CONFIG(ADDR_SURF_P2) |
3238 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3239 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3240 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3241 PIPE_CONFIG(ADDR_SURF_P2));
3242 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3243 PIPE_CONFIG(ADDR_SURF_P2) |
3244 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3245 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3246 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3247 PIPE_CONFIG(ADDR_SURF_P2) |
3248 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3249 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3250 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3251 PIPE_CONFIG(ADDR_SURF_P2) |
3252 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3253 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3254 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3255 PIPE_CONFIG(ADDR_SURF_P2) |
3256 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3257 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3258 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3259 PIPE_CONFIG(ADDR_SURF_P2) |
3260 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3261 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3262 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3263 PIPE_CONFIG(ADDR_SURF_P2) |
3264 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3265 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3266 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3267 PIPE_CONFIG(ADDR_SURF_P2) |
3268 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3269 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3270 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3271 PIPE_CONFIG(ADDR_SURF_P2) |
3272 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3273 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3274 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3275 PIPE_CONFIG(ADDR_SURF_P2) |
3276 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3277 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3278 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3279 PIPE_CONFIG(ADDR_SURF_P2) |
3280 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3281 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3282 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3283 PIPE_CONFIG(ADDR_SURF_P2) |
3284 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3285 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3286 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3287 PIPE_CONFIG(ADDR_SURF_P2) |
3288 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3289 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3290 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3291 PIPE_CONFIG(ADDR_SURF_P2) |
3292 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3293 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3294 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3295 PIPE_CONFIG(ADDR_SURF_P2) |
3296 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3297 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3298 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3299 PIPE_CONFIG(ADDR_SURF_P2) |
3300 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3301 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3302 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3303 PIPE_CONFIG(ADDR_SURF_P2) |
3304 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3305 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3306 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3307 PIPE_CONFIG(ADDR_SURF_P2) |
3308 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3309 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3310 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3311 PIPE_CONFIG(ADDR_SURF_P2) |
3312 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3313 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3315 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3316 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3317 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3318 NUM_BANKS(ADDR_SURF_8_BANK));
3319 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3320 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3321 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3322 NUM_BANKS(ADDR_SURF_8_BANK));
3323 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3324 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3325 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3326 NUM_BANKS(ADDR_SURF_8_BANK));
3327 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3328 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3329 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3330 NUM_BANKS(ADDR_SURF_8_BANK));
3331 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3332 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3333 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3334 NUM_BANKS(ADDR_SURF_8_BANK));
3335 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3336 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3337 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3338 NUM_BANKS(ADDR_SURF_8_BANK));
3339 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3340 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3341 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3342 NUM_BANKS(ADDR_SURF_8_BANK));
3343 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3344 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3345 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3346 NUM_BANKS(ADDR_SURF_16_BANK));
3347 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3348 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3349 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3350 NUM_BANKS(ADDR_SURF_16_BANK));
3351 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3352 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3353 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3354 NUM_BANKS(ADDR_SURF_16_BANK));
3355 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3356 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3357 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3358 NUM_BANKS(ADDR_SURF_16_BANK));
3359 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3360 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3361 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3362 NUM_BANKS(ADDR_SURF_16_BANK));
3363 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3364 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3365 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3366 NUM_BANKS(ADDR_SURF_16_BANK));
3367 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3368 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3369 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3370 NUM_BANKS(ADDR_SURF_8_BANK));
3372 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3373 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3375 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3377 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3378 if (reg_offset != 7)
3379 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3385 static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
3386 u32 se_num, u32 sh_num, u32 instance)
3390 if (instance == 0xffffffff)
3391 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
3393 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
3395 if (se_num == 0xffffffff)
3396 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3398 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3400 if (sh_num == 0xffffffff)
3401 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3403 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3405 WREG32(mmGRBM_GFX_INDEX, data);
3408 static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
3412 data = RREG32(mmCC_RB_BACKEND_DISABLE) |
3413 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3415 data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE);
3417 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
3418 adev->gfx.config.max_sh_per_se);
3420 return (~data) & mask;
3424 gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1)
3426 switch (adev->asic_type) {
3428 *rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) |
3429 RB_XSEL2(1) | PKR_MAP(2) |
3430 PKR_XSEL(1) | PKR_YSEL(1) |
3431 SE_MAP(2) | SE_XSEL(2) | SE_YSEL(3);
3432 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) |
3436 case CHIP_POLARIS10:
3437 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3438 SE_XSEL(1) | SE_YSEL(1);
3439 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(2) |
3444 *rconf |= RB_MAP_PKR0(2);
3447 case CHIP_POLARIS11:
3448 case CHIP_POLARIS12:
3449 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3450 SE_XSEL(1) | SE_YSEL(1);
3458 DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type);
3464 gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev,
3465 u32 raster_config, u32 raster_config_1,
3466 unsigned rb_mask, unsigned num_rb)
3468 unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1);
3469 unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1);
3470 unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2);
3471 unsigned rb_per_se = num_rb / num_se;
3472 unsigned se_mask[4];
3475 se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask;
3476 se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask;
3477 se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask;
3478 se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask;
3480 WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4));
3481 WARN_ON(!(sh_per_se == 1 || sh_per_se == 2));
3482 WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2));
3484 if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
3485 (!se_mask[2] && !se_mask[3]))) {
3486 raster_config_1 &= ~SE_PAIR_MAP_MASK;
3488 if (!se_mask[0] && !se_mask[1]) {
3490 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3);
3493 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0);
3497 for (se = 0; se < num_se; se++) {
3498 unsigned raster_config_se = raster_config;
3499 unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
3500 unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
3501 int idx = (se / 2) * 2;
3503 if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
3504 raster_config_se &= ~SE_MAP_MASK;
3506 if (!se_mask[idx]) {
3507 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3);
3509 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0);
3513 pkr0_mask &= rb_mask;
3514 pkr1_mask &= rb_mask;
3515 if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
3516 raster_config_se &= ~PKR_MAP_MASK;
3519 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3);
3521 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0);
3525 if (rb_per_se >= 2) {
3526 unsigned rb0_mask = 1 << (se * rb_per_se);
3527 unsigned rb1_mask = rb0_mask << 1;
3529 rb0_mask &= rb_mask;
3530 rb1_mask &= rb_mask;
3531 if (!rb0_mask || !rb1_mask) {
3532 raster_config_se &= ~RB_MAP_PKR0_MASK;
3536 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3);
3539 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0);
3543 if (rb_per_se > 2) {
3544 rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
3545 rb1_mask = rb0_mask << 1;
3546 rb0_mask &= rb_mask;
3547 rb1_mask &= rb_mask;
3548 if (!rb0_mask || !rb1_mask) {
3549 raster_config_se &= ~RB_MAP_PKR1_MASK;
3553 RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3);
3556 RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0);
3562 /* GRBM_GFX_INDEX has a different offset on VI */
3563 gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff);
3564 WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se);
3565 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3568 /* GRBM_GFX_INDEX has a different offset on VI */
3569 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3572 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
3576 u32 raster_config = 0, raster_config_1 = 0;
3578 u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
3579 adev->gfx.config.max_sh_per_se;
3580 unsigned num_rb_pipes;
3582 mutex_lock(&adev->grbm_idx_mutex);
3583 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3584 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3585 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3586 data = gfx_v8_0_get_rb_active_bitmap(adev);
3587 active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
3588 rb_bitmap_width_per_sh);
3591 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3593 adev->gfx.config.backend_enable_mask = active_rbs;
3594 adev->gfx.config.num_rbs = hweight32(active_rbs);
3596 num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se *
3597 adev->gfx.config.max_shader_engines, 16);
3599 gfx_v8_0_raster_config(adev, &raster_config, &raster_config_1);
3601 if (!adev->gfx.config.backend_enable_mask ||
3602 adev->gfx.config.num_rbs >= num_rb_pipes) {
3603 WREG32(mmPA_SC_RASTER_CONFIG, raster_config);
3604 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3606 gfx_v8_0_write_harvested_raster_configs(adev, raster_config, raster_config_1,
3607 adev->gfx.config.backend_enable_mask,
3611 /* cache the values for userspace */
3612 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3613 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3614 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3615 adev->gfx.config.rb_config[i][j].rb_backend_disable =
3616 RREG32(mmCC_RB_BACKEND_DISABLE);
3617 adev->gfx.config.rb_config[i][j].user_rb_backend_disable =
3618 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3619 adev->gfx.config.rb_config[i][j].raster_config =
3620 RREG32(mmPA_SC_RASTER_CONFIG);
3621 adev->gfx.config.rb_config[i][j].raster_config_1 =
3622 RREG32(mmPA_SC_RASTER_CONFIG_1);
3625 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3626 mutex_unlock(&adev->grbm_idx_mutex);
3630 * gfx_v8_0_init_compute_vmid - gart enable
3632 * @adev: amdgpu_device pointer
3634 * Initialize compute vmid sh_mem registers
3637 #define DEFAULT_SH_MEM_BASES (0x6000)
3638 #define FIRST_COMPUTE_VMID (8)
3639 #define LAST_COMPUTE_VMID (16)
3640 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
3643 uint32_t sh_mem_config;
3644 uint32_t sh_mem_bases;
3647 * Configure apertures:
3648 * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB)
3649 * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB)
3650 * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB)
3652 sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
3654 sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
3655 SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
3656 SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
3657 SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
3658 MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
3659 SH_MEM_CONFIG__PRIVATE_ATC_MASK;
3661 mutex_lock(&adev->srbm_mutex);
3662 for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
3663 vi_srbm_select(adev, 0, 0, 0, i);
3664 /* CP and shaders */
3665 WREG32(mmSH_MEM_CONFIG, sh_mem_config);
3666 WREG32(mmSH_MEM_APE1_BASE, 1);
3667 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3668 WREG32(mmSH_MEM_BASES, sh_mem_bases);
3670 vi_srbm_select(adev, 0, 0, 0, 0);
3671 mutex_unlock(&adev->srbm_mutex);
3674 static void gfx_v8_0_config_init(struct amdgpu_device *adev)
3676 switch (adev->asic_type) {
3678 adev->gfx.config.double_offchip_lds_buf = 1;
3682 adev->gfx.config.double_offchip_lds_buf = 0;
3687 static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
3689 u32 tmp, sh_static_mem_cfg;
3692 WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF);
3693 WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3694 WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3695 WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
3697 gfx_v8_0_tiling_mode_table_init(adev);
3698 gfx_v8_0_setup_rb(adev);
3699 gfx_v8_0_get_cu_info(adev);
3700 gfx_v8_0_config_init(adev);
3702 /* XXX SH_MEM regs */
3703 /* where to put LDS, scratch, GPUVM in FSA64 space */
3704 sh_static_mem_cfg = REG_SET_FIELD(0, SH_STATIC_MEM_CONFIG,
3706 sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3708 sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3710 WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg);
3712 mutex_lock(&adev->srbm_mutex);
3713 for (i = 0; i < adev->vm_manager.id_mgr[0].num_ids; i++) {
3714 vi_srbm_select(adev, 0, 0, 0, i);
3715 /* CP and shaders */
3717 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
3718 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3719 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3720 SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3721 WREG32(mmSH_MEM_CONFIG, tmp);
3722 WREG32(mmSH_MEM_BASES, 0);
3724 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
3725 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3726 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3727 SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3728 WREG32(mmSH_MEM_CONFIG, tmp);
3729 tmp = adev->mc.shared_aperture_start >> 48;
3730 WREG32(mmSH_MEM_BASES, tmp);
3733 WREG32(mmSH_MEM_APE1_BASE, 1);
3734 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3736 vi_srbm_select(adev, 0, 0, 0, 0);
3737 mutex_unlock(&adev->srbm_mutex);
3739 gfx_v8_0_init_compute_vmid(adev);
3741 mutex_lock(&adev->grbm_idx_mutex);
3743 * making sure that the following register writes will be broadcasted
3744 * to all the shaders
3746 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3748 WREG32(mmPA_SC_FIFO_SIZE,
3749 (adev->gfx.config.sc_prim_fifo_size_frontend <<
3750 PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
3751 (adev->gfx.config.sc_prim_fifo_size_backend <<
3752 PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
3753 (adev->gfx.config.sc_hiz_tile_fifo_size <<
3754 PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
3755 (adev->gfx.config.sc_earlyz_tile_fifo_size <<
3756 PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
3758 tmp = RREG32(mmSPI_ARB_PRIORITY);
3759 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS0, 2);
3760 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS1, 2);
3761 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS2, 2);
3762 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS3, 2);
3763 WREG32(mmSPI_ARB_PRIORITY, tmp);
3765 mutex_unlock(&adev->grbm_idx_mutex);
3769 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3774 mutex_lock(&adev->grbm_idx_mutex);
3775 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3776 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3777 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3778 for (k = 0; k < adev->usec_timeout; k++) {
3779 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3785 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3786 mutex_unlock(&adev->grbm_idx_mutex);
3788 mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3789 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3790 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3791 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3792 for (k = 0; k < adev->usec_timeout; k++) {
3793 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3799 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3802 u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3804 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
3805 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
3806 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
3807 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
3809 WREG32(mmCP_INT_CNTL_RING0, tmp);
3812 static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
3815 WREG32(mmRLC_CSIB_ADDR_HI,
3816 adev->gfx.rlc.clear_state_gpu_addr >> 32);
3817 WREG32(mmRLC_CSIB_ADDR_LO,
3818 adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
3819 WREG32(mmRLC_CSIB_LENGTH,
3820 adev->gfx.rlc.clear_state_size);
3823 static void gfx_v8_0_parse_ind_reg_list(int *register_list_format,
3826 int *unique_indices,
3829 int *ind_start_offsets,
3834 bool new_entry = true;
3836 for (; ind_offset < list_size; ind_offset++) {
3840 ind_start_offsets[*offset_count] = ind_offset;
3841 *offset_count = *offset_count + 1;
3842 BUG_ON(*offset_count >= max_offset);
3845 if (register_list_format[ind_offset] == 0xFFFFFFFF) {
3852 /* look for the matching indice */
3854 indices < *indices_count;
3856 if (unique_indices[indices] ==
3857 register_list_format[ind_offset])
3861 if (indices >= *indices_count) {
3862 unique_indices[*indices_count] =
3863 register_list_format[ind_offset];
3864 indices = *indices_count;
3865 *indices_count = *indices_count + 1;
3866 BUG_ON(*indices_count >= max_indices);
3869 register_list_format[ind_offset] = indices;
3873 static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
3876 int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0};
3877 int indices_count = 0;
3878 int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3879 int offset_count = 0;
3882 unsigned int *register_list_format =
3883 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
3884 if (!register_list_format)
3886 memcpy(register_list_format, adev->gfx.rlc.register_list_format,
3887 adev->gfx.rlc.reg_list_format_size_bytes);
3889 gfx_v8_0_parse_ind_reg_list(register_list_format,
3890 RLC_FormatDirectRegListLength,
3891 adev->gfx.rlc.reg_list_format_size_bytes >> 2,
3894 sizeof(unique_indices) / sizeof(int),
3895 indirect_start_offsets,
3897 sizeof(indirect_start_offsets)/sizeof(int));
3899 /* save and restore list */
3900 WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1);
3902 WREG32(mmRLC_SRM_ARAM_ADDR, 0);
3903 for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
3904 WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]);
3907 WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start);
3908 for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
3909 WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]);
3911 list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
3912 list_size = list_size >> 1;
3913 WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size);
3914 WREG32(mmRLC_GPM_SCRATCH_DATA, list_size);
3916 /* starting offsets starts */
3917 WREG32(mmRLC_GPM_SCRATCH_ADDR,
3918 adev->gfx.rlc.starting_offsets_start);
3919 for (i = 0; i < sizeof(indirect_start_offsets)/sizeof(int); i++)
3920 WREG32(mmRLC_GPM_SCRATCH_DATA,
3921 indirect_start_offsets[i]);
3923 /* unique indices */
3924 temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
3925 data = mmRLC_SRM_INDEX_CNTL_DATA_0;
3926 for (i = 0; i < sizeof(unique_indices) / sizeof(int); i++) {
3927 if (unique_indices[i] != 0) {
3928 WREG32(temp + i, unique_indices[i] & 0x3FFFF);
3929 WREG32(data + i, unique_indices[i] >> 20);
3932 kfree(register_list_format);
3937 static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
3939 WREG32_FIELD(RLC_SRM_CNTL, SRM_ENABLE, 1);
3942 static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev)
3946 WREG32_FIELD(CP_RB_WPTR_POLL_CNTL, IDLE_POLL_COUNT, 0x60);
3948 data = REG_SET_FIELD(0, RLC_PG_DELAY, POWER_UP_DELAY, 0x10);
3949 data = REG_SET_FIELD(data, RLC_PG_DELAY, POWER_DOWN_DELAY, 0x10);
3950 data = REG_SET_FIELD(data, RLC_PG_DELAY, CMD_PROPAGATE_DELAY, 0x10);
3951 data = REG_SET_FIELD(data, RLC_PG_DELAY, MEM_SLEEP_DELAY, 0x10);
3952 WREG32(mmRLC_PG_DELAY, data);
3954 WREG32_FIELD(RLC_PG_DELAY_2, SERDES_CMD_DELAY, 0x3);
3955 WREG32_FIELD(RLC_AUTO_PG_CTRL, GRBM_REG_SAVE_GFX_IDLE_THRESHOLD, 0x55f0);
3959 static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
3962 WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PU_ENABLE, enable ? 1 : 0);
3965 static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
3968 WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PD_ENABLE, enable ? 1 : 0);
3971 static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable)
3973 WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 0 : 1);
3976 static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
3978 if ((adev->asic_type == CHIP_CARRIZO) ||
3979 (adev->asic_type == CHIP_STONEY)) {
3980 gfx_v8_0_init_csb(adev);
3981 gfx_v8_0_init_save_restore_list(adev);
3982 gfx_v8_0_enable_save_restore_machine(adev);
3983 WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
3984 gfx_v8_0_init_power_gating(adev);
3985 WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
3986 } else if ((adev->asic_type == CHIP_POLARIS11) ||
3987 (adev->asic_type == CHIP_POLARIS12)) {
3988 gfx_v8_0_init_csb(adev);
3989 gfx_v8_0_init_save_restore_list(adev);
3990 gfx_v8_0_enable_save_restore_machine(adev);
3991 gfx_v8_0_init_power_gating(adev);
3996 static void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
3998 WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 0);
4000 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4001 gfx_v8_0_wait_for_rlc_serdes(adev);
4004 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
4006 WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4009 WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
4013 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
4015 WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 1);
4017 /* carrizo do enable cp interrupt after cp inited */
4018 if (!(adev->flags & AMD_IS_APU))
4019 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4024 static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev)
4026 const struct rlc_firmware_header_v2_0 *hdr;
4027 const __le32 *fw_data;
4028 unsigned i, fw_size;
4030 if (!adev->gfx.rlc_fw)
4033 hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
4034 amdgpu_ucode_print_rlc_hdr(&hdr->header);
4036 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
4037 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
4038 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
4040 WREG32(mmRLC_GPM_UCODE_ADDR, 0);
4041 for (i = 0; i < fw_size; i++)
4042 WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
4043 WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
4048 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
4053 gfx_v8_0_rlc_stop(adev);
4056 tmp = RREG32(mmRLC_CGCG_CGLS_CTRL);
4057 tmp &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
4058 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4059 WREG32(mmRLC_CGCG_CGLS_CTRL, tmp);
4060 if (adev->asic_type == CHIP_POLARIS11 ||
4061 adev->asic_type == CHIP_POLARIS10 ||
4062 adev->asic_type == CHIP_POLARIS12) {
4063 tmp = RREG32(mmRLC_CGCG_CGLS_CTRL_3D);
4065 WREG32(mmRLC_CGCG_CGLS_CTRL_3D, tmp);
4069 WREG32(mmRLC_PG_CNTL, 0);
4071 gfx_v8_0_rlc_reset(adev);
4072 gfx_v8_0_init_pg(adev);
4074 if (!adev->pp_enabled) {
4075 if (adev->firmware.load_type != AMDGPU_FW_LOAD_SMU) {
4076 /* legacy rlc firmware loading */
4077 r = gfx_v8_0_rlc_load_microcode(adev);
4081 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4082 AMDGPU_UCODE_ID_RLC_G);
4088 gfx_v8_0_rlc_start(adev);
4093 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
4096 u32 tmp = RREG32(mmCP_ME_CNTL);
4099 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
4100 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
4101 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
4103 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
4104 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
4105 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
4106 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
4107 adev->gfx.gfx_ring[i].ready = false;
4109 WREG32(mmCP_ME_CNTL, tmp);
4113 static int gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
4115 const struct gfx_firmware_header_v1_0 *pfp_hdr;
4116 const struct gfx_firmware_header_v1_0 *ce_hdr;
4117 const struct gfx_firmware_header_v1_0 *me_hdr;
4118 const __le32 *fw_data;
4119 unsigned i, fw_size;
4121 if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
4124 pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
4125 adev->gfx.pfp_fw->data;
4126 ce_hdr = (const struct gfx_firmware_header_v1_0 *)
4127 adev->gfx.ce_fw->data;
4128 me_hdr = (const struct gfx_firmware_header_v1_0 *)
4129 adev->gfx.me_fw->data;
4131 amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
4132 amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
4133 amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
4135 gfx_v8_0_cp_gfx_enable(adev, false);
4138 fw_data = (const __le32 *)
4139 (adev->gfx.pfp_fw->data +
4140 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
4141 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
4142 WREG32(mmCP_PFP_UCODE_ADDR, 0);
4143 for (i = 0; i < fw_size; i++)
4144 WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
4145 WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
4148 fw_data = (const __le32 *)
4149 (adev->gfx.ce_fw->data +
4150 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
4151 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
4152 WREG32(mmCP_CE_UCODE_ADDR, 0);
4153 for (i = 0; i < fw_size; i++)
4154 WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
4155 WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
4158 fw_data = (const __le32 *)
4159 (adev->gfx.me_fw->data +
4160 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
4161 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
4162 WREG32(mmCP_ME_RAM_WADDR, 0);
4163 for (i = 0; i < fw_size; i++)
4164 WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
4165 WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
4170 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
4173 const struct cs_section_def *sect = NULL;
4174 const struct cs_extent_def *ext = NULL;
4176 /* begin clear state */
4178 /* context control state */
4181 for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4182 for (ext = sect->section; ext->extent != NULL; ++ext) {
4183 if (sect->id == SECT_CONTEXT)
4184 count += 2 + ext->reg_count;
4189 /* pa_sc_raster_config/pa_sc_raster_config1 */
4191 /* end clear state */
4199 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
4201 struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
4202 const struct cs_section_def *sect = NULL;
4203 const struct cs_extent_def *ext = NULL;
4207 WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
4208 WREG32(mmCP_ENDIAN_SWAP, 0);
4209 WREG32(mmCP_DEVICE_ID, 1);
4211 gfx_v8_0_cp_gfx_enable(adev, true);
4213 r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
4215 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
4219 /* clear state buffer */
4220 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4221 amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4223 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4224 amdgpu_ring_write(ring, 0x80000000);
4225 amdgpu_ring_write(ring, 0x80000000);
4227 for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4228 for (ext = sect->section; ext->extent != NULL; ++ext) {
4229 if (sect->id == SECT_CONTEXT) {
4230 amdgpu_ring_write(ring,
4231 PACKET3(PACKET3_SET_CONTEXT_REG,
4233 amdgpu_ring_write(ring,
4234 ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
4235 for (i = 0; i < ext->reg_count; i++)
4236 amdgpu_ring_write(ring, ext->extent[i]);
4241 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4242 amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
4243 switch (adev->asic_type) {
4245 case CHIP_POLARIS10:
4246 amdgpu_ring_write(ring, 0x16000012);
4247 amdgpu_ring_write(ring, 0x0000002A);
4249 case CHIP_POLARIS11:
4250 case CHIP_POLARIS12:
4251 amdgpu_ring_write(ring, 0x16000012);
4252 amdgpu_ring_write(ring, 0x00000000);
4255 amdgpu_ring_write(ring, 0x3a00161a);
4256 amdgpu_ring_write(ring, 0x0000002e);
4259 amdgpu_ring_write(ring, 0x00000002);
4260 amdgpu_ring_write(ring, 0x00000000);
4263 amdgpu_ring_write(ring, adev->gfx.config.num_rbs == 1 ?
4264 0x00000000 : 0x00000002);
4265 amdgpu_ring_write(ring, 0x00000000);
4268 amdgpu_ring_write(ring, 0x00000000);
4269 amdgpu_ring_write(ring, 0x00000000);
4275 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4276 amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4278 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4279 amdgpu_ring_write(ring, 0);
4281 /* init the CE partitions */
4282 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4283 amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4284 amdgpu_ring_write(ring, 0x8000);
4285 amdgpu_ring_write(ring, 0x8000);
4287 amdgpu_ring_commit(ring);
4291 static void gfx_v8_0_set_cpg_door_bell(struct amdgpu_device *adev, struct amdgpu_ring *ring)
4294 /* no gfx doorbells on iceland */
4295 if (adev->asic_type == CHIP_TOPAZ)
4298 tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
4300 if (ring->use_doorbell) {
4301 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4302 DOORBELL_OFFSET, ring->doorbell_index);
4303 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4305 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4308 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
4311 WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
4313 if (adev->flags & AMD_IS_APU)
4316 tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
4317 DOORBELL_RANGE_LOWER,
4318 AMDGPU_DOORBELL_GFX_RING0);
4319 WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
4321 WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
4322 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
4325 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
4327 struct amdgpu_ring *ring;
4330 u64 rb_addr, rptr_addr, wptr_gpu_addr;
4333 /* Set the write pointer delay */
4334 WREG32(mmCP_RB_WPTR_DELAY, 0);
4336 /* set the RB to use vmid 0 */
4337 WREG32(mmCP_RB_VMID, 0);
4339 /* Set ring buffer size */
4340 ring = &adev->gfx.gfx_ring[0];
4341 rb_bufsz = order_base_2(ring->ring_size / 8);
4342 tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
4343 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
4344 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
4345 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
4347 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
4349 WREG32(mmCP_RB0_CNTL, tmp);
4351 /* Initialize the ring buffer's read and write pointers */
4352 WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
4354 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
4356 /* set the wb address wether it's enabled or not */
4357 rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4358 WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
4359 WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
4361 wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4362 WREG32(mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
4363 WREG32(mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
4365 WREG32(mmCP_RB0_CNTL, tmp);
4367 rb_addr = ring->gpu_addr >> 8;
4368 WREG32(mmCP_RB0_BASE, rb_addr);
4369 WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
4371 gfx_v8_0_set_cpg_door_bell(adev, ring);
4372 /* start the ring */
4373 amdgpu_ring_clear_ring(ring);
4374 gfx_v8_0_cp_gfx_start(adev);
4376 r = amdgpu_ring_test_ring(ring);
4378 ring->ready = false;
4383 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
4388 WREG32(mmCP_MEC_CNTL, 0);
4390 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
4391 for (i = 0; i < adev->gfx.num_compute_rings; i++)
4392 adev->gfx.compute_ring[i].ready = false;
4393 adev->gfx.kiq.ring.ready = false;
4398 static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev)
4400 const struct gfx_firmware_header_v1_0 *mec_hdr;
4401 const __le32 *fw_data;
4402 unsigned i, fw_size;
4404 if (!adev->gfx.mec_fw)
4407 gfx_v8_0_cp_compute_enable(adev, false);
4409 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
4410 amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
4412 fw_data = (const __le32 *)
4413 (adev->gfx.mec_fw->data +
4414 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4415 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4418 WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0);
4419 for (i = 0; i < fw_size; i++)
4420 WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data+i));
4421 WREG32(mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version);
4423 /* Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
4424 if (adev->gfx.mec2_fw) {
4425 const struct gfx_firmware_header_v1_0 *mec2_hdr;
4427 mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
4428 amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header);
4430 fw_data = (const __le32 *)
4431 (adev->gfx.mec2_fw->data +
4432 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4433 fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4435 WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0);
4436 for (i = 0; i < fw_size; i++)
4437 WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data+i));
4438 WREG32(mmCP_MEC_ME2_UCODE_ADDR, adev->gfx.mec2_fw_version);
4445 static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring)
4448 struct amdgpu_device *adev = ring->adev;
4450 /* tell RLC which is KIQ queue */
4451 tmp = RREG32(mmRLC_CP_SCHEDULERS);
4453 tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
4454 WREG32(mmRLC_CP_SCHEDULERS, tmp);
4456 WREG32(mmRLC_CP_SCHEDULERS, tmp);
4459 static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev)
4461 struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
4462 uint32_t scratch, tmp = 0;
4463 uint64_t queue_mask = 0;
4466 for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
4467 if (!test_bit(i, adev->gfx.mec.queue_bitmap))
4470 /* This situation may be hit in the future if a new HW
4471 * generation exposes more than 64 queues. If so, the
4472 * definition of queue_mask needs updating */
4473 if (WARN_ON(i >= (sizeof(queue_mask)*8))) {
4474 DRM_ERROR("Invalid KCQ enabled: %d\n", i);
4478 queue_mask |= (1ull << i);
4481 r = amdgpu_gfx_scratch_get(adev, &scratch);
4483 DRM_ERROR("Failed to get scratch reg (%d).\n", r);
4486 WREG32(scratch, 0xCAFEDEAD);
4488 r = amdgpu_ring_alloc(kiq_ring, (8 * adev->gfx.num_compute_rings) + 11);
4490 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
4491 amdgpu_gfx_scratch_free(adev, scratch);
4495 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
4496 amdgpu_ring_write(kiq_ring, 0); /* vmid_mask:0 queue_type:0 (KIQ) */
4497 amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */
4498 amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */
4499 amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
4500 amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
4501 amdgpu_ring_write(kiq_ring, 0); /* oac mask */
4502 amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
4503 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4504 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4505 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
4506 uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4509 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
4510 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
4511 amdgpu_ring_write(kiq_ring,
4512 PACKET3_MAP_QUEUES_NUM_QUEUES(1));
4513 amdgpu_ring_write(kiq_ring,
4514 PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index) |
4515 PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
4516 PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
4517 PACKET3_MAP_QUEUES_ME(ring->me == 1 ? 0 : 1)); /* doorbell */
4518 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
4519 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
4520 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
4521 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
4523 /* write to scratch for completion */
4524 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
4525 amdgpu_ring_write(kiq_ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
4526 amdgpu_ring_write(kiq_ring, 0xDEADBEEF);
4527 amdgpu_ring_commit(kiq_ring);
4529 for (i = 0; i < adev->usec_timeout; i++) {
4530 tmp = RREG32(scratch);
4531 if (tmp == 0xDEADBEEF)
4535 if (i >= adev->usec_timeout) {
4536 DRM_ERROR("KCQ enable failed (scratch(0x%04X)=0x%08X)\n",
4540 amdgpu_gfx_scratch_free(adev, scratch);
4545 static int gfx_v8_0_deactivate_hqd(struct amdgpu_device *adev, u32 req)
4549 if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) {
4550 WREG32_FIELD(CP_HQD_DEQUEUE_REQUEST, DEQUEUE_REQ, req);
4551 for (i = 0; i < adev->usec_timeout; i++) {
4552 if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK))
4556 if (i == adev->usec_timeout)
4559 WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
4560 WREG32(mmCP_HQD_PQ_RPTR, 0);
4561 WREG32(mmCP_HQD_PQ_WPTR, 0);
4566 static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
4568 struct amdgpu_device *adev = ring->adev;
4569 struct vi_mqd *mqd = ring->mqd_ptr;
4570 uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
4573 mqd->header = 0xC0310800;
4574 mqd->compute_pipelinestat_enable = 0x00000001;
4575 mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4576 mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4577 mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4578 mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4579 mqd->compute_misc_reserved = 0x00000003;
4580 if (!(adev->flags & AMD_IS_APU)) {
4581 mqd->dynamic_cu_mask_addr_lo = lower_32_bits(ring->mqd_gpu_addr
4582 + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4583 mqd->dynamic_cu_mask_addr_hi = upper_32_bits(ring->mqd_gpu_addr
4584 + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4586 eop_base_addr = ring->eop_gpu_addr >> 8;
4587 mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
4588 mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
4590 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4591 tmp = RREG32(mmCP_HQD_EOP_CONTROL);
4592 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
4593 (order_base_2(GFX8_MEC_HPD_SIZE / 4) - 1));
4595 mqd->cp_hqd_eop_control = tmp;
4597 /* enable doorbell? */
4598 tmp = REG_SET_FIELD(RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL),
4599 CP_HQD_PQ_DOORBELL_CONTROL,
4601 ring->use_doorbell ? 1 : 0);
4603 mqd->cp_hqd_pq_doorbell_control = tmp;
4605 /* set the pointer to the MQD */
4606 mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
4607 mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
4609 /* set MQD vmid to 0 */
4610 tmp = RREG32(mmCP_MQD_CONTROL);
4611 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4612 mqd->cp_mqd_control = tmp;
4614 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4615 hqd_gpu_addr = ring->gpu_addr >> 8;
4616 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4617 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4619 /* set up the HQD, this is similar to CP_RB0_CNTL */
4620 tmp = RREG32(mmCP_HQD_PQ_CONTROL);
4621 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4622 (order_base_2(ring->ring_size / 4) - 1));
4623 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4624 ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
4626 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
4628 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
4629 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
4630 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4631 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4632 mqd->cp_hqd_pq_control = tmp;
4634 /* set the wb address whether it's enabled or not */
4635 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4636 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4637 mqd->cp_hqd_pq_rptr_report_addr_hi =
4638 upper_32_bits(wb_gpu_addr) & 0xffff;
4640 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4641 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4642 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
4643 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4646 /* enable the doorbell if requested */
4647 if (ring->use_doorbell) {
4648 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4649 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4650 DOORBELL_OFFSET, ring->doorbell_index);
4652 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4654 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4655 DOORBELL_SOURCE, 0);
4656 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4660 mqd->cp_hqd_pq_doorbell_control = tmp;
4662 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4664 mqd->cp_hqd_pq_wptr = ring->wptr;
4665 mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
4667 /* set the vmid for the queue */
4668 mqd->cp_hqd_vmid = 0;
4670 tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
4671 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
4672 mqd->cp_hqd_persistent_state = tmp;
4675 tmp = RREG32(mmCP_HQD_IB_CONTROL);
4676 tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
4677 tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MTYPE, 3);
4678 mqd->cp_hqd_ib_control = tmp;
4680 tmp = RREG32(mmCP_HQD_IQ_TIMER);
4681 tmp = REG_SET_FIELD(tmp, CP_HQD_IQ_TIMER, MTYPE, 3);
4682 mqd->cp_hqd_iq_timer = tmp;
4684 tmp = RREG32(mmCP_HQD_CTX_SAVE_CONTROL);
4685 tmp = REG_SET_FIELD(tmp, CP_HQD_CTX_SAVE_CONTROL, MTYPE, 3);
4686 mqd->cp_hqd_ctx_save_control = tmp;
4689 mqd->cp_hqd_eop_rptr = RREG32(mmCP_HQD_EOP_RPTR);
4690 mqd->cp_hqd_eop_wptr = RREG32(mmCP_HQD_EOP_WPTR);
4691 mqd->cp_hqd_pipe_priority = RREG32(mmCP_HQD_PIPE_PRIORITY);
4692 mqd->cp_hqd_queue_priority = RREG32(mmCP_HQD_QUEUE_PRIORITY);
4693 mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM);
4694 mqd->cp_hqd_ctx_save_base_addr_lo = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_LO);
4695 mqd->cp_hqd_ctx_save_base_addr_hi = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_HI);
4696 mqd->cp_hqd_cntl_stack_offset = RREG32(mmCP_HQD_CNTL_STACK_OFFSET);
4697 mqd->cp_hqd_cntl_stack_size = RREG32(mmCP_HQD_CNTL_STACK_SIZE);
4698 mqd->cp_hqd_wg_state_offset = RREG32(mmCP_HQD_WG_STATE_OFFSET);
4699 mqd->cp_hqd_ctx_save_size = RREG32(mmCP_HQD_CTX_SAVE_SIZE);
4700 mqd->cp_hqd_eop_done_events = RREG32(mmCP_HQD_EOP_EVENTS);
4701 mqd->cp_hqd_error = RREG32(mmCP_HQD_ERROR);
4702 mqd->cp_hqd_eop_wptr_mem = RREG32(mmCP_HQD_EOP_WPTR_MEM);
4703 mqd->cp_hqd_eop_dones = RREG32(mmCP_HQD_EOP_DONES);
4705 /* activate the queue */
4706 mqd->cp_hqd_active = 1;
4711 int gfx_v8_0_mqd_commit(struct amdgpu_device *adev,
4717 /* HQD registers extend from mmCP_MQD_BASE_ADDR to mmCP_HQD_ERROR */
4718 mqd_data = &mqd->cp_mqd_base_addr_lo;
4720 /* disable wptr polling */
4721 WREG32_FIELD(CP_PQ_WPTR_POLL_CNTL, EN, 0);
4723 /* program all HQD registers */
4724 for (mqd_reg = mmCP_HQD_VMID; mqd_reg <= mmCP_HQD_EOP_CONTROL; mqd_reg++)
4725 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4727 /* Tonga errata: EOP RPTR/WPTR should be left unmodified.
4728 * This is safe since EOP RPTR==WPTR for any inactive HQD
4729 * on ASICs that do not support context-save.
4730 * EOP writes/reads can start anywhere in the ring.
4732 if (adev->asic_type != CHIP_TONGA) {
4733 WREG32(mmCP_HQD_EOP_RPTR, mqd->cp_hqd_eop_rptr);
4734 WREG32(mmCP_HQD_EOP_WPTR, mqd->cp_hqd_eop_wptr);
4735 WREG32(mmCP_HQD_EOP_WPTR_MEM, mqd->cp_hqd_eop_wptr_mem);
4738 for (mqd_reg = mmCP_HQD_EOP_EVENTS; mqd_reg <= mmCP_HQD_ERROR; mqd_reg++)
4739 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4741 /* activate the HQD */
4742 for (mqd_reg = mmCP_MQD_BASE_ADDR; mqd_reg <= mmCP_HQD_ACTIVE; mqd_reg++)
4743 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4748 static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring)
4750 struct amdgpu_device *adev = ring->adev;
4751 struct vi_mqd *mqd = ring->mqd_ptr;
4752 int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
4754 gfx_v8_0_kiq_setting(ring);
4756 if (adev->gfx.in_reset) { /* for GPU_RESET case */
4757 /* reset MQD to a clean status */
4758 if (adev->gfx.mec.mqd_backup[mqd_idx])
4759 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4761 /* reset ring buffer */
4763 amdgpu_ring_clear_ring(ring);
4764 mutex_lock(&adev->srbm_mutex);
4765 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4766 gfx_v8_0_mqd_commit(adev, mqd);
4767 vi_srbm_select(adev, 0, 0, 0, 0);
4768 mutex_unlock(&adev->srbm_mutex);
4770 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4771 ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4772 ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
4773 mutex_lock(&adev->srbm_mutex);
4774 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4775 gfx_v8_0_mqd_init(ring);
4776 gfx_v8_0_mqd_commit(adev, mqd);
4777 vi_srbm_select(adev, 0, 0, 0, 0);
4778 mutex_unlock(&adev->srbm_mutex);
4780 if (adev->gfx.mec.mqd_backup[mqd_idx])
4781 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4787 static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring)
4789 struct amdgpu_device *adev = ring->adev;
4790 struct vi_mqd *mqd = ring->mqd_ptr;
4791 int mqd_idx = ring - &adev->gfx.compute_ring[0];
4793 if (!adev->gfx.in_reset && !adev->gfx.in_suspend) {
4794 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4795 ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4796 ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
4797 mutex_lock(&adev->srbm_mutex);
4798 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4799 gfx_v8_0_mqd_init(ring);
4800 vi_srbm_select(adev, 0, 0, 0, 0);
4801 mutex_unlock(&adev->srbm_mutex);
4803 if (adev->gfx.mec.mqd_backup[mqd_idx])
4804 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4805 } else if (adev->gfx.in_reset) { /* for GPU_RESET case */
4806 /* reset MQD to a clean status */
4807 if (adev->gfx.mec.mqd_backup[mqd_idx])
4808 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4809 /* reset ring buffer */
4811 amdgpu_ring_clear_ring(ring);
4813 amdgpu_ring_clear_ring(ring);
4818 static void gfx_v8_0_set_mec_doorbell_range(struct amdgpu_device *adev)
4820 if (adev->asic_type > CHIP_TONGA) {
4821 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, AMDGPU_DOORBELL_KIQ << 2);
4822 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, AMDGPU_DOORBELL_MEC_RING7 << 2);
4824 /* enable doorbells */
4825 WREG32_FIELD(CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4828 static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev)
4830 struct amdgpu_ring *ring = NULL;
4833 gfx_v8_0_cp_compute_enable(adev, true);
4835 ring = &adev->gfx.kiq.ring;
4837 r = amdgpu_bo_reserve(ring->mqd_obj, false);
4838 if (unlikely(r != 0))
4841 r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4843 r = gfx_v8_0_kiq_init_queue(ring);
4844 amdgpu_bo_kunmap(ring->mqd_obj);
4845 ring->mqd_ptr = NULL;
4847 amdgpu_bo_unreserve(ring->mqd_obj);
4851 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4852 ring = &adev->gfx.compute_ring[i];
4854 r = amdgpu_bo_reserve(ring->mqd_obj, false);
4855 if (unlikely(r != 0))
4857 r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4859 r = gfx_v8_0_kcq_init_queue(ring);
4860 amdgpu_bo_kunmap(ring->mqd_obj);
4861 ring->mqd_ptr = NULL;
4863 amdgpu_bo_unreserve(ring->mqd_obj);
4868 gfx_v8_0_set_mec_doorbell_range(adev);
4870 r = gfx_v8_0_kiq_kcq_enable(adev);
4875 ring = &adev->gfx.kiq.ring;
4877 r = amdgpu_ring_test_ring(ring);
4879 ring->ready = false;
4884 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4885 ring = &adev->gfx.compute_ring[i];
4887 r = amdgpu_ring_test_ring(ring);
4889 ring->ready = false;
4896 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
4900 if (!(adev->flags & AMD_IS_APU))
4901 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4903 if (!adev->pp_enabled) {
4904 if (adev->firmware.load_type != AMDGPU_FW_LOAD_SMU) {
4905 /* legacy firmware loading */
4906 r = gfx_v8_0_cp_gfx_load_microcode(adev);
4910 r = gfx_v8_0_cp_compute_load_microcode(adev);
4914 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4915 AMDGPU_UCODE_ID_CP_CE);
4919 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4920 AMDGPU_UCODE_ID_CP_PFP);
4924 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4925 AMDGPU_UCODE_ID_CP_ME);
4929 if (adev->asic_type == CHIP_TOPAZ) {
4930 r = gfx_v8_0_cp_compute_load_microcode(adev);
4934 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4935 AMDGPU_UCODE_ID_CP_MEC1);
4942 r = gfx_v8_0_cp_gfx_resume(adev);
4946 r = gfx_v8_0_kiq_resume(adev);
4950 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4955 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
4957 gfx_v8_0_cp_gfx_enable(adev, enable);
4958 gfx_v8_0_cp_compute_enable(adev, enable);
4961 static int gfx_v8_0_hw_init(void *handle)
4964 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4966 gfx_v8_0_init_golden_registers(adev);
4967 gfx_v8_0_gpu_init(adev);
4969 r = gfx_v8_0_rlc_resume(adev);
4973 r = gfx_v8_0_cp_resume(adev);
4978 static int gfx_v8_0_hw_fini(void *handle)
4980 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4982 amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
4983 amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
4984 if (amdgpu_sriov_vf(adev)) {
4985 pr_debug("For SRIOV client, shouldn't do anything.\n");
4988 gfx_v8_0_cp_enable(adev, false);
4989 gfx_v8_0_rlc_stop(adev);
4991 amdgpu_set_powergating_state(adev,
4992 AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_UNGATE);
4997 static int gfx_v8_0_suspend(void *handle)
4999 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5000 adev->gfx.in_suspend = true;
5001 return gfx_v8_0_hw_fini(adev);
5004 static int gfx_v8_0_resume(void *handle)
5007 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5009 r = gfx_v8_0_hw_init(adev);
5010 adev->gfx.in_suspend = false;
5014 static bool gfx_v8_0_is_idle(void *handle)
5016 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5018 if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE))
5024 static int gfx_v8_0_wait_for_idle(void *handle)
5027 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5029 for (i = 0; i < adev->usec_timeout; i++) {
5030 if (gfx_v8_0_is_idle(handle))
5038 static bool gfx_v8_0_check_soft_reset(void *handle)
5040 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5041 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5045 tmp = RREG32(mmGRBM_STATUS);
5046 if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
5047 GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
5048 GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
5049 GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
5050 GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
5051 GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK |
5052 GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
5053 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5054 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
5055 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5056 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
5057 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5058 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5062 tmp = RREG32(mmGRBM_STATUS2);
5063 if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
5064 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5065 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
5067 if (REG_GET_FIELD(tmp, GRBM_STATUS2, CPF_BUSY) ||
5068 REG_GET_FIELD(tmp, GRBM_STATUS2, CPC_BUSY) ||
5069 REG_GET_FIELD(tmp, GRBM_STATUS2, CPG_BUSY)) {
5070 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5072 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5074 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5076 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET,
5077 SOFT_RESET_GRBM, 1);
5081 tmp = RREG32(mmSRBM_STATUS);
5082 if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
5083 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5084 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5085 if (REG_GET_FIELD(tmp, SRBM_STATUS, SEM_BUSY))
5086 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5087 SRBM_SOFT_RESET, SOFT_RESET_SEM, 1);
5089 if (grbm_soft_reset || srbm_soft_reset) {
5090 adev->gfx.grbm_soft_reset = grbm_soft_reset;
5091 adev->gfx.srbm_soft_reset = srbm_soft_reset;
5094 adev->gfx.grbm_soft_reset = 0;
5095 adev->gfx.srbm_soft_reset = 0;
5100 static int gfx_v8_0_pre_soft_reset(void *handle)
5102 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5103 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5105 if ((!adev->gfx.grbm_soft_reset) &&
5106 (!adev->gfx.srbm_soft_reset))
5109 grbm_soft_reset = adev->gfx.grbm_soft_reset;
5110 srbm_soft_reset = adev->gfx.srbm_soft_reset;
5113 gfx_v8_0_rlc_stop(adev);
5115 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5116 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5117 /* Disable GFX parsing/prefetching */
5118 gfx_v8_0_cp_gfx_enable(adev, false);
5120 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5121 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5122 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5123 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5126 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5127 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5129 mutex_lock(&adev->srbm_mutex);
5130 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5131 gfx_v8_0_deactivate_hqd(adev, 2);
5132 vi_srbm_select(adev, 0, 0, 0, 0);
5133 mutex_unlock(&adev->srbm_mutex);
5135 /* Disable MEC parsing/prefetching */
5136 gfx_v8_0_cp_compute_enable(adev, false);
5142 static int gfx_v8_0_soft_reset(void *handle)
5144 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5145 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5148 if ((!adev->gfx.grbm_soft_reset) &&
5149 (!adev->gfx.srbm_soft_reset))
5152 grbm_soft_reset = adev->gfx.grbm_soft_reset;
5153 srbm_soft_reset = adev->gfx.srbm_soft_reset;
5155 if (grbm_soft_reset || srbm_soft_reset) {
5156 tmp = RREG32(mmGMCON_DEBUG);
5157 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 1);
5158 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 1);
5159 WREG32(mmGMCON_DEBUG, tmp);
5163 if (grbm_soft_reset) {
5164 tmp = RREG32(mmGRBM_SOFT_RESET);
5165 tmp |= grbm_soft_reset;
5166 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5167 WREG32(mmGRBM_SOFT_RESET, tmp);
5168 tmp = RREG32(mmGRBM_SOFT_RESET);
5172 tmp &= ~grbm_soft_reset;
5173 WREG32(mmGRBM_SOFT_RESET, tmp);
5174 tmp = RREG32(mmGRBM_SOFT_RESET);
5177 if (srbm_soft_reset) {
5178 tmp = RREG32(mmSRBM_SOFT_RESET);
5179 tmp |= srbm_soft_reset;
5180 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5181 WREG32(mmSRBM_SOFT_RESET, tmp);
5182 tmp = RREG32(mmSRBM_SOFT_RESET);
5186 tmp &= ~srbm_soft_reset;
5187 WREG32(mmSRBM_SOFT_RESET, tmp);
5188 tmp = RREG32(mmSRBM_SOFT_RESET);
5191 if (grbm_soft_reset || srbm_soft_reset) {
5192 tmp = RREG32(mmGMCON_DEBUG);
5193 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 0);
5194 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 0);
5195 WREG32(mmGMCON_DEBUG, tmp);
5198 /* Wait a little for things to settle down */
5204 static int gfx_v8_0_post_soft_reset(void *handle)
5206 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5207 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5209 if ((!adev->gfx.grbm_soft_reset) &&
5210 (!adev->gfx.srbm_soft_reset))
5213 grbm_soft_reset = adev->gfx.grbm_soft_reset;
5214 srbm_soft_reset = adev->gfx.srbm_soft_reset;
5216 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5217 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5218 gfx_v8_0_cp_gfx_resume(adev);
5220 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5221 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5222 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5223 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5226 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5227 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5229 mutex_lock(&adev->srbm_mutex);
5230 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5231 gfx_v8_0_deactivate_hqd(adev, 2);
5232 vi_srbm_select(adev, 0, 0, 0, 0);
5233 mutex_unlock(&adev->srbm_mutex);
5235 gfx_v8_0_kiq_resume(adev);
5237 gfx_v8_0_rlc_start(adev);
5243 * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
5245 * @adev: amdgpu_device pointer
5247 * Fetches a GPU clock counter snapshot.
5248 * Returns the 64 bit clock counter snapshot.
5250 static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
5254 mutex_lock(&adev->gfx.gpu_clock_mutex);
5255 WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
5256 clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
5257 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5258 mutex_unlock(&adev->gfx.gpu_clock_mutex);
5262 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
5264 uint32_t gds_base, uint32_t gds_size,
5265 uint32_t gws_base, uint32_t gws_size,
5266 uint32_t oa_base, uint32_t oa_size)
5268 gds_base = gds_base >> AMDGPU_GDS_SHIFT;
5269 gds_size = gds_size >> AMDGPU_GDS_SHIFT;
5271 gws_base = gws_base >> AMDGPU_GWS_SHIFT;
5272 gws_size = gws_size >> AMDGPU_GWS_SHIFT;
5274 oa_base = oa_base >> AMDGPU_OA_SHIFT;
5275 oa_size = oa_size >> AMDGPU_OA_SHIFT;
5278 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5279 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5280 WRITE_DATA_DST_SEL(0)));
5281 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
5282 amdgpu_ring_write(ring, 0);
5283 amdgpu_ring_write(ring, gds_base);
5286 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5287 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5288 WRITE_DATA_DST_SEL(0)));
5289 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
5290 amdgpu_ring_write(ring, 0);
5291 amdgpu_ring_write(ring, gds_size);
5294 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5295 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5296 WRITE_DATA_DST_SEL(0)));
5297 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
5298 amdgpu_ring_write(ring, 0);
5299 amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
5302 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5303 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5304 WRITE_DATA_DST_SEL(0)));
5305 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
5306 amdgpu_ring_write(ring, 0);
5307 amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
5310 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
5312 WREG32(mmSQ_IND_INDEX,
5313 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5314 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5315 (address << SQ_IND_INDEX__INDEX__SHIFT) |
5316 (SQ_IND_INDEX__FORCE_READ_MASK));
5317 return RREG32(mmSQ_IND_DATA);
5320 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
5321 uint32_t wave, uint32_t thread,
5322 uint32_t regno, uint32_t num, uint32_t *out)
5324 WREG32(mmSQ_IND_INDEX,
5325 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5326 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5327 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
5328 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
5329 (SQ_IND_INDEX__FORCE_READ_MASK) |
5330 (SQ_IND_INDEX__AUTO_INCR_MASK));
5332 *(out++) = RREG32(mmSQ_IND_DATA);
5335 static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
5337 /* type 0 wave data */
5338 dst[(*no_fields)++] = 0;
5339 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
5340 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
5341 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
5342 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
5343 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
5344 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
5345 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
5346 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
5347 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
5348 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
5349 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
5350 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
5351 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_LO);
5352 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_HI);
5353 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_LO);
5354 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI);
5355 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
5356 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
5359 static void gfx_v8_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
5360 uint32_t wave, uint32_t start,
5361 uint32_t size, uint32_t *dst)
5364 adev, simd, wave, 0,
5365 start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
5369 static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = {
5370 .get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter,
5371 .select_se_sh = &gfx_v8_0_select_se_sh,
5372 .read_wave_data = &gfx_v8_0_read_wave_data,
5373 .read_wave_sgprs = &gfx_v8_0_read_wave_sgprs,
5376 static int gfx_v8_0_early_init(void *handle)
5378 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5380 adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
5381 adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
5382 adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
5383 gfx_v8_0_set_ring_funcs(adev);
5384 gfx_v8_0_set_irq_funcs(adev);
5385 gfx_v8_0_set_gds_init(adev);
5386 gfx_v8_0_set_rlc_funcs(adev);
5391 static int gfx_v8_0_late_init(void *handle)
5393 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5396 r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
5400 r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
5404 /* requires IBs so do in late init after IB pool is initialized */
5405 r = gfx_v8_0_do_edc_gpr_workarounds(adev);
5409 amdgpu_set_powergating_state(adev,
5410 AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_GATE);
5415 static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
5418 if ((adev->asic_type == CHIP_POLARIS11) ||
5419 (adev->asic_type == CHIP_POLARIS12))
5420 /* Send msg to SMU via Powerplay */
5421 amdgpu_set_powergating_state(adev,
5422 AMD_IP_BLOCK_TYPE_SMC,
5424 AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE);
5426 WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0);
5429 static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
5432 WREG32_FIELD(RLC_PG_CNTL, DYN_PER_CU_PG_ENABLE, enable ? 1 : 0);
5435 static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
5438 WREG32_FIELD(RLC_PG_CNTL, QUICK_PG_ENABLE, enable ? 1 : 0);
5441 static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
5444 WREG32_FIELD(RLC_PG_CNTL, GFX_POWER_GATING_ENABLE, enable ? 1 : 0);
5447 static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev,
5450 WREG32_FIELD(RLC_PG_CNTL, GFX_PIPELINE_PG_ENABLE, enable ? 1 : 0);
5452 /* Read any GFX register to wake up GFX. */
5454 RREG32(mmDB_RENDER_CONTROL);
5457 static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev,
5460 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
5461 cz_enable_gfx_cg_power_gating(adev, true);
5462 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
5463 cz_enable_gfx_pipeline_power_gating(adev, true);
5465 cz_enable_gfx_cg_power_gating(adev, false);
5466 cz_enable_gfx_pipeline_power_gating(adev, false);
5470 static int gfx_v8_0_set_powergating_state(void *handle,
5471 enum amd_powergating_state state)
5473 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5474 bool enable = (state == AMD_PG_STATE_GATE);
5476 if (amdgpu_sriov_vf(adev))
5479 switch (adev->asic_type) {
5483 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
5484 cz_enable_sck_slow_down_on_power_up(adev, true);
5485 cz_enable_sck_slow_down_on_power_down(adev, true);
5487 cz_enable_sck_slow_down_on_power_up(adev, false);
5488 cz_enable_sck_slow_down_on_power_down(adev, false);
5490 if (adev->pg_flags & AMD_PG_SUPPORT_CP)
5491 cz_enable_cp_power_gating(adev, true);
5493 cz_enable_cp_power_gating(adev, false);
5495 cz_update_gfx_cg_power_gating(adev, enable);
5497 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5498 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5500 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5502 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5503 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5505 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5507 case CHIP_POLARIS11:
5508 case CHIP_POLARIS12:
5509 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5510 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5512 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5514 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5515 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5517 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5519 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable)
5520 polaris11_enable_gfx_quick_mg_power_gating(adev, true);
5522 polaris11_enable_gfx_quick_mg_power_gating(adev, false);
5531 static void gfx_v8_0_get_clockgating_state(void *handle, u32 *flags)
5533 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5536 if (amdgpu_sriov_vf(adev))
5539 /* AMD_CG_SUPPORT_GFX_MGCG */
5540 data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5541 if (!(data & RLC_CGTT_MGCG_OVERRIDE__CPF_MASK))
5542 *flags |= AMD_CG_SUPPORT_GFX_MGCG;
5544 /* AMD_CG_SUPPORT_GFX_CGLG */
5545 data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5546 if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5547 *flags |= AMD_CG_SUPPORT_GFX_CGCG;
5549 /* AMD_CG_SUPPORT_GFX_CGLS */
5550 if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5551 *flags |= AMD_CG_SUPPORT_GFX_CGLS;
5553 /* AMD_CG_SUPPORT_GFX_CGTS */
5554 data = RREG32(mmCGTS_SM_CTRL_REG);
5555 if (!(data & CGTS_SM_CTRL_REG__OVERRIDE_MASK))
5556 *flags |= AMD_CG_SUPPORT_GFX_CGTS;
5558 /* AMD_CG_SUPPORT_GFX_CGTS_LS */
5559 if (!(data & CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK))
5560 *flags |= AMD_CG_SUPPORT_GFX_CGTS_LS;
5562 /* AMD_CG_SUPPORT_GFX_RLC_LS */
5563 data = RREG32(mmRLC_MEM_SLP_CNTL);
5564 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
5565 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
5567 /* AMD_CG_SUPPORT_GFX_CP_LS */
5568 data = RREG32(mmCP_MEM_SLP_CNTL);
5569 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
5570 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
5573 static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
5574 uint32_t reg_addr, uint32_t cmd)
5578 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
5580 WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5581 WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5583 data = RREG32(mmRLC_SERDES_WR_CTRL);
5584 if (adev->asic_type == CHIP_STONEY)
5585 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5586 RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5587 RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5588 RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5589 RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5590 RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5591 RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5592 RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5593 RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5595 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5596 RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5597 RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5598 RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5599 RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5600 RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5601 RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5602 RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5603 RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
5604 RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
5605 RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5606 data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
5607 (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
5608 (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
5609 (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
5611 WREG32(mmRLC_SERDES_WR_CTRL, data);
5614 #define MSG_ENTER_RLC_SAFE_MODE 1
5615 #define MSG_EXIT_RLC_SAFE_MODE 0
5616 #define RLC_GPR_REG2__REQ_MASK 0x00000001
5617 #define RLC_GPR_REG2__REQ__SHIFT 0
5618 #define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001
5619 #define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e
5621 static void iceland_enter_rlc_safe_mode(struct amdgpu_device *adev)
5626 data = RREG32(mmRLC_CNTL);
5627 if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5630 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5631 data |= RLC_SAFE_MODE__CMD_MASK;
5632 data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5633 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
5634 WREG32(mmRLC_SAFE_MODE, data);
5636 for (i = 0; i < adev->usec_timeout; i++) {
5637 if ((RREG32(mmRLC_GPM_STAT) &
5638 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5639 RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5640 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5641 RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5646 for (i = 0; i < adev->usec_timeout; i++) {
5647 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5651 adev->gfx.rlc.in_safe_mode = true;
5655 static void iceland_exit_rlc_safe_mode(struct amdgpu_device *adev)
5660 data = RREG32(mmRLC_CNTL);
5661 if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5664 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5665 if (adev->gfx.rlc.in_safe_mode) {
5666 data |= RLC_SAFE_MODE__CMD_MASK;
5667 data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5668 WREG32(mmRLC_SAFE_MODE, data);
5669 adev->gfx.rlc.in_safe_mode = false;
5673 for (i = 0; i < adev->usec_timeout; i++) {
5674 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5680 static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
5681 .enter_safe_mode = iceland_enter_rlc_safe_mode,
5682 .exit_safe_mode = iceland_exit_rlc_safe_mode
5685 static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
5688 uint32_t temp, data;
5690 adev->gfx.rlc.funcs->enter_safe_mode(adev);
5692 /* It is disabled by HW by default */
5693 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
5694 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5695 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS)
5696 /* 1 - RLC memory Light sleep */
5697 WREG32_FIELD(RLC_MEM_SLP_CNTL, RLC_MEM_LS_EN, 1);
5699 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS)
5700 WREG32_FIELD(CP_MEM_SLP_CNTL, CP_MEM_LS_EN, 1);
5703 /* 3 - RLC_CGTT_MGCG_OVERRIDE */
5704 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5705 if (adev->flags & AMD_IS_APU)
5706 data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5707 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5708 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK);
5710 data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5711 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5712 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5713 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5716 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5718 /* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5719 gfx_v8_0_wait_for_rlc_serdes(adev);
5721 /* 5 - clear mgcg override */
5722 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5724 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
5725 /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
5726 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5727 data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
5728 data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
5729 data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
5730 data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
5731 if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
5732 (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
5733 data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
5734 data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
5735 data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
5737 WREG32(mmCGTS_SM_CTRL_REG, data);
5741 /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5742 gfx_v8_0_wait_for_rlc_serdes(adev);
5744 /* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
5745 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5746 data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5747 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5748 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5749 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5751 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5753 /* 2 - disable MGLS in RLC */
5754 data = RREG32(mmRLC_MEM_SLP_CNTL);
5755 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
5756 data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5757 WREG32(mmRLC_MEM_SLP_CNTL, data);
5760 /* 3 - disable MGLS in CP */
5761 data = RREG32(mmCP_MEM_SLP_CNTL);
5762 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
5763 data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5764 WREG32(mmCP_MEM_SLP_CNTL, data);
5767 /* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
5768 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5769 data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
5770 CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
5772 WREG32(mmCGTS_SM_CTRL_REG, data);
5774 /* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5775 gfx_v8_0_wait_for_rlc_serdes(adev);
5777 /* 6 - set mgcg override */
5778 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5782 /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5783 gfx_v8_0_wait_for_rlc_serdes(adev);
5786 adev->gfx.rlc.funcs->exit_safe_mode(adev);
5789 static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5792 uint32_t temp, temp1, data, data1;
5794 temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5796 adev->gfx.rlc.funcs->enter_safe_mode(adev);
5798 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
5799 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5800 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
5802 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5804 /* : wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5805 gfx_v8_0_wait_for_rlc_serdes(adev);
5807 /* 2 - clear cgcg override */
5808 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5810 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5811 gfx_v8_0_wait_for_rlc_serdes(adev);
5813 /* 3 - write cmd to set CGLS */
5814 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
5816 /* 4 - enable cgcg */
5817 data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5819 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5821 data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5823 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5824 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
5827 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5829 data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5833 WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5835 /* 5 enable cntx_empty_int_enable/cntx_busy_int_enable/
5836 * Cmp_busy/GFX_Idle interrupts
5838 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5840 /* disable cntx_empty_int_enable & GFX Idle interrupt */
5841 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5844 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5845 data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
5846 RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
5848 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5850 /* read gfx register to wake up cgcg */
5851 RREG32(mmCB_CGTT_SCLK_CTRL);
5852 RREG32(mmCB_CGTT_SCLK_CTRL);
5853 RREG32(mmCB_CGTT_SCLK_CTRL);
5854 RREG32(mmCB_CGTT_SCLK_CTRL);
5856 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5857 gfx_v8_0_wait_for_rlc_serdes(adev);
5859 /* write cmd to Set CGCG Overrride */
5860 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5862 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5863 gfx_v8_0_wait_for_rlc_serdes(adev);
5865 /* write cmd to Clear CGLS */
5866 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
5868 /* disable cgcg, cgls should be disabled too. */
5869 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
5870 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
5872 WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5873 /* enable interrupts again for PG */
5874 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5877 gfx_v8_0_wait_for_rlc_serdes(adev);
5879 adev->gfx.rlc.funcs->exit_safe_mode(adev);
5881 static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
5885 /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
5886 * === MGCG + MGLS + TS(CG/LS) ===
5888 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5889 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5891 /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
5892 * === CGCG + CGLS ===
5894 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5895 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5900 static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev,
5901 enum amd_clockgating_state state)
5903 uint32_t msg_id, pp_state = 0;
5904 uint32_t pp_support_state = 0;
5905 void *pp_handle = adev->powerplay.pp_handle;
5907 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
5908 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5909 pp_support_state = PP_STATE_SUPPORT_LS;
5910 pp_state = PP_STATE_LS;
5912 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
5913 pp_support_state |= PP_STATE_SUPPORT_CG;
5914 pp_state |= PP_STATE_CG;
5916 if (state == AMD_CG_STATE_UNGATE)
5919 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5923 amd_set_clockgating_by_smu(pp_handle, msg_id);
5926 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
5927 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5928 pp_support_state = PP_STATE_SUPPORT_LS;
5929 pp_state = PP_STATE_LS;
5932 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
5933 pp_support_state |= PP_STATE_SUPPORT_CG;
5934 pp_state |= PP_STATE_CG;
5937 if (state == AMD_CG_STATE_UNGATE)
5940 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5944 amd_set_clockgating_by_smu(pp_handle, msg_id);
5950 static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev,
5951 enum amd_clockgating_state state)
5954 uint32_t msg_id, pp_state = 0;
5955 uint32_t pp_support_state = 0;
5956 void *pp_handle = adev->powerplay.pp_handle;
5958 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
5959 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5960 pp_support_state = PP_STATE_SUPPORT_LS;
5961 pp_state = PP_STATE_LS;
5963 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
5964 pp_support_state |= PP_STATE_SUPPORT_CG;
5965 pp_state |= PP_STATE_CG;
5967 if (state == AMD_CG_STATE_UNGATE)
5970 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5974 amd_set_clockgating_by_smu(pp_handle, msg_id);
5977 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_GFX_3D_CGLS)) {
5978 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) {
5979 pp_support_state = PP_STATE_SUPPORT_LS;
5980 pp_state = PP_STATE_LS;
5982 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) {
5983 pp_support_state |= PP_STATE_SUPPORT_CG;
5984 pp_state |= PP_STATE_CG;
5986 if (state == AMD_CG_STATE_UNGATE)
5989 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5993 amd_set_clockgating_by_smu(pp_handle, msg_id);
5996 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
5997 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5998 pp_support_state = PP_STATE_SUPPORT_LS;
5999 pp_state = PP_STATE_LS;
6002 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
6003 pp_support_state |= PP_STATE_SUPPORT_CG;
6004 pp_state |= PP_STATE_CG;
6007 if (state == AMD_CG_STATE_UNGATE)
6010 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6014 amd_set_clockgating_by_smu(pp_handle, msg_id);
6017 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
6018 pp_support_state = PP_STATE_SUPPORT_LS;
6020 if (state == AMD_CG_STATE_UNGATE)
6023 pp_state = PP_STATE_LS;
6025 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6029 amd_set_clockgating_by_smu(pp_handle, msg_id);
6032 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
6033 pp_support_state = PP_STATE_SUPPORT_LS;
6035 if (state == AMD_CG_STATE_UNGATE)
6038 pp_state = PP_STATE_LS;
6039 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6043 amd_set_clockgating_by_smu(pp_handle, msg_id);
6049 static int gfx_v8_0_set_clockgating_state(void *handle,
6050 enum amd_clockgating_state state)
6052 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
6054 if (amdgpu_sriov_vf(adev))
6057 switch (adev->asic_type) {
6061 gfx_v8_0_update_gfx_clock_gating(adev,
6062 state == AMD_CG_STATE_GATE);
6065 gfx_v8_0_tonga_update_gfx_clock_gating(adev, state);
6067 case CHIP_POLARIS10:
6068 case CHIP_POLARIS11:
6069 case CHIP_POLARIS12:
6070 gfx_v8_0_polaris_update_gfx_clock_gating(adev, state);
6078 static u64 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring)
6080 return ring->adev->wb.wb[ring->rptr_offs];
6083 static u64 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
6085 struct amdgpu_device *adev = ring->adev;
6087 if (ring->use_doorbell)
6088 /* XXX check if swapping is necessary on BE */
6089 return ring->adev->wb.wb[ring->wptr_offs];
6091 return RREG32(mmCP_RB0_WPTR);
6094 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
6096 struct amdgpu_device *adev = ring->adev;
6098 if (ring->use_doorbell) {
6099 /* XXX check if swapping is necessary on BE */
6100 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6101 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6103 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
6104 (void)RREG32(mmCP_RB0_WPTR);
6108 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
6110 u32 ref_and_mask, reg_mem_engine;
6112 if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) ||
6113 (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)) {
6116 ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
6119 ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
6126 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
6127 reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
6130 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6131 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
6132 WAIT_REG_MEM_FUNCTION(3) | /* == */
6134 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
6135 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
6136 amdgpu_ring_write(ring, ref_and_mask);
6137 amdgpu_ring_write(ring, ref_and_mask);
6138 amdgpu_ring_write(ring, 0x20); /* poll interval */
6141 static void gfx_v8_0_ring_emit_vgt_flush(struct amdgpu_ring *ring)
6143 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6144 amdgpu_ring_write(ring, EVENT_TYPE(VS_PARTIAL_FLUSH) |
6147 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6148 amdgpu_ring_write(ring, EVENT_TYPE(VGT_FLUSH) |
6153 static void gfx_v8_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
6155 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6156 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6157 WRITE_DATA_DST_SEL(0) |
6159 amdgpu_ring_write(ring, mmHDP_DEBUG0);
6160 amdgpu_ring_write(ring, 0);
6161 amdgpu_ring_write(ring, 1);
6165 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
6166 struct amdgpu_ib *ib,
6167 unsigned vm_id, bool ctx_switch)
6169 u32 header, control = 0;
6171 if (ib->flags & AMDGPU_IB_FLAG_CE)
6172 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
6174 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
6176 control |= ib->length_dw | (vm_id << 24);
6178 if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
6179 control |= INDIRECT_BUFFER_PRE_ENB(1);
6181 if (!(ib->flags & AMDGPU_IB_FLAG_CE))
6182 gfx_v8_0_ring_emit_de_meta(ring);
6185 amdgpu_ring_write(ring, header);
6186 amdgpu_ring_write(ring,
6190 (ib->gpu_addr & 0xFFFFFFFC));
6191 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6192 amdgpu_ring_write(ring, control);
6195 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
6196 struct amdgpu_ib *ib,
6197 unsigned vm_id, bool ctx_switch)
6199 u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vm_id << 24);
6201 amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
6202 amdgpu_ring_write(ring,
6206 (ib->gpu_addr & 0xFFFFFFFC));
6207 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6208 amdgpu_ring_write(ring, control);
6211 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
6212 u64 seq, unsigned flags)
6214 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6215 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6217 /* EVENT_WRITE_EOP - flush caches, send int */
6218 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6219 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6221 EOP_TC_WB_ACTION_EN |
6222 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6224 amdgpu_ring_write(ring, addr & 0xfffffffc);
6225 amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
6226 DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6227 amdgpu_ring_write(ring, lower_32_bits(seq));
6228 amdgpu_ring_write(ring, upper_32_bits(seq));
6232 static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
6234 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6235 uint32_t seq = ring->fence_drv.sync_seq;
6236 uint64_t addr = ring->fence_drv.gpu_addr;
6238 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6239 amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
6240 WAIT_REG_MEM_FUNCTION(3) | /* equal */
6241 WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
6242 amdgpu_ring_write(ring, addr & 0xfffffffc);
6243 amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
6244 amdgpu_ring_write(ring, seq);
6245 amdgpu_ring_write(ring, 0xffffffff);
6246 amdgpu_ring_write(ring, 4); /* poll interval */
6249 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
6250 unsigned vm_id, uint64_t pd_addr)
6252 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6254 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6255 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6256 WRITE_DATA_DST_SEL(0)) |
6259 amdgpu_ring_write(ring,
6260 (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id));
6262 amdgpu_ring_write(ring,
6263 (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vm_id - 8));
6265 amdgpu_ring_write(ring, 0);
6266 amdgpu_ring_write(ring, pd_addr >> 12);
6268 /* bits 0-15 are the VM contexts0-15 */
6269 /* invalidate the cache */
6270 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6271 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6272 WRITE_DATA_DST_SEL(0)));
6273 amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6274 amdgpu_ring_write(ring, 0);
6275 amdgpu_ring_write(ring, 1 << vm_id);
6277 /* wait for the invalidate to complete */
6278 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6279 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6280 WAIT_REG_MEM_FUNCTION(0) | /* always */
6281 WAIT_REG_MEM_ENGINE(0))); /* me */
6282 amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6283 amdgpu_ring_write(ring, 0);
6284 amdgpu_ring_write(ring, 0); /* ref */
6285 amdgpu_ring_write(ring, 0); /* mask */
6286 amdgpu_ring_write(ring, 0x20); /* poll interval */
6288 /* compute doesn't have PFP */
6290 /* sync PFP to ME, otherwise we might get invalid PFP reads */
6291 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6292 amdgpu_ring_write(ring, 0x0);
6296 static u64 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
6298 return ring->adev->wb.wb[ring->wptr_offs];
6301 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
6303 struct amdgpu_device *adev = ring->adev;
6305 /* XXX check if swapping is necessary on BE */
6306 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6307 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6310 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
6314 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6315 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6317 /* RELEASE_MEM - flush caches, send int */
6318 amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
6319 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6321 EOP_TC_WB_ACTION_EN |
6322 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6324 amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6325 amdgpu_ring_write(ring, addr & 0xfffffffc);
6326 amdgpu_ring_write(ring, upper_32_bits(addr));
6327 amdgpu_ring_write(ring, lower_32_bits(seq));
6328 amdgpu_ring_write(ring, upper_32_bits(seq));
6331 static void gfx_v8_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
6332 u64 seq, unsigned int flags)
6334 /* we only allocate 32bit for each seq wb address */
6335 BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
6337 /* write fence seq to the "addr" */
6338 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6339 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6340 WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
6341 amdgpu_ring_write(ring, lower_32_bits(addr));
6342 amdgpu_ring_write(ring, upper_32_bits(addr));
6343 amdgpu_ring_write(ring, lower_32_bits(seq));
6345 if (flags & AMDGPU_FENCE_FLAG_INT) {
6346 /* set register to trigger INT */
6347 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6348 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6349 WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
6350 amdgpu_ring_write(ring, mmCPC_INT_STATUS);
6351 amdgpu_ring_write(ring, 0);
6352 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
6356 static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring)
6358 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6359 amdgpu_ring_write(ring, 0);
6362 static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
6366 if (amdgpu_sriov_vf(ring->adev))
6367 gfx_v8_0_ring_emit_ce_meta(ring);
6369 dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
6370 if (flags & AMDGPU_HAVE_CTX_SWITCH) {
6371 gfx_v8_0_ring_emit_vgt_flush(ring);
6372 /* set load_global_config & load_global_uconfig */
6374 /* set load_cs_sh_regs */
6376 /* set load_per_context_state & load_gfx_sh_regs for GFX */
6379 /* set load_ce_ram if preamble presented */
6380 if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
6383 /* still load_ce_ram if this is the first time preamble presented
6384 * although there is no context switch happens.
6386 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
6390 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6391 amdgpu_ring_write(ring, dw2);
6392 amdgpu_ring_write(ring, 0);
6395 static unsigned gfx_v8_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
6399 amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
6400 amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
6401 amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
6402 amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
6403 ret = ring->wptr & ring->buf_mask;
6404 amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
6408 static void gfx_v8_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
6412 BUG_ON(offset > ring->buf_mask);
6413 BUG_ON(ring->ring[offset] != 0x55aa55aa);
6415 cur = (ring->wptr & ring->buf_mask) - 1;
6416 if (likely(cur > offset))
6417 ring->ring[offset] = cur - offset;
6419 ring->ring[offset] = (ring->ring_size >> 2) - offset + cur;
6422 static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
6424 struct amdgpu_device *adev = ring->adev;
6426 amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
6427 amdgpu_ring_write(ring, 0 | /* src: register*/
6428 (5 << 8) | /* dst: memory */
6429 (1 << 20)); /* write confirm */
6430 amdgpu_ring_write(ring, reg);
6431 amdgpu_ring_write(ring, 0);
6432 amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
6433 adev->virt.reg_val_offs * 4));
6434 amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
6435 adev->virt.reg_val_offs * 4));
6438 static void gfx_v8_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
6441 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6442 amdgpu_ring_write(ring, (1 << 16)); /* no inc addr */
6443 amdgpu_ring_write(ring, reg);
6444 amdgpu_ring_write(ring, 0);
6445 amdgpu_ring_write(ring, val);
6448 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
6449 enum amdgpu_interrupt_state state)
6451 WREG32_FIELD(CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE,
6452 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6455 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
6457 enum amdgpu_interrupt_state state)
6459 u32 mec_int_cntl, mec_int_cntl_reg;
6462 * amdgpu controls only the first MEC. That's why this function only
6463 * handles the setting of interrupts for this specific MEC. All other
6464 * pipes' interrupts are set by amdkfd.
6470 mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
6473 mec_int_cntl_reg = mmCP_ME1_PIPE1_INT_CNTL;
6476 mec_int_cntl_reg = mmCP_ME1_PIPE2_INT_CNTL;
6479 mec_int_cntl_reg = mmCP_ME1_PIPE3_INT_CNTL;
6482 DRM_DEBUG("invalid pipe %d\n", pipe);
6486 DRM_DEBUG("invalid me %d\n", me);
6491 case AMDGPU_IRQ_STATE_DISABLE:
6492 mec_int_cntl = RREG32(mec_int_cntl_reg);
6493 mec_int_cntl &= ~CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6494 WREG32(mec_int_cntl_reg, mec_int_cntl);
6496 case AMDGPU_IRQ_STATE_ENABLE:
6497 mec_int_cntl = RREG32(mec_int_cntl_reg);
6498 mec_int_cntl |= CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6499 WREG32(mec_int_cntl_reg, mec_int_cntl);
6506 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
6507 struct amdgpu_irq_src *source,
6509 enum amdgpu_interrupt_state state)
6511 WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE,
6512 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6517 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
6518 struct amdgpu_irq_src *source,
6520 enum amdgpu_interrupt_state state)
6522 WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE,
6523 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6528 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6529 struct amdgpu_irq_src *src,
6531 enum amdgpu_interrupt_state state)
6534 case AMDGPU_CP_IRQ_GFX_EOP:
6535 gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
6537 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6538 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6540 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6541 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6543 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6544 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6546 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6547 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6549 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6550 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6552 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6553 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6555 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6556 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6558 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6559 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6567 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
6568 struct amdgpu_irq_src *source,
6569 struct amdgpu_iv_entry *entry)
6572 u8 me_id, pipe_id, queue_id;
6573 struct amdgpu_ring *ring;
6575 DRM_DEBUG("IH: CP EOP\n");
6576 me_id = (entry->ring_id & 0x0c) >> 2;
6577 pipe_id = (entry->ring_id & 0x03) >> 0;
6578 queue_id = (entry->ring_id & 0x70) >> 4;
6582 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6586 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6587 ring = &adev->gfx.compute_ring[i];
6588 /* Per-queue interrupt is supported for MEC starting from VI.
6589 * The interrupt can only be enabled/disabled per pipe instead of per queue.
6591 if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6592 amdgpu_fence_process(ring);
6599 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
6600 struct amdgpu_irq_src *source,
6601 struct amdgpu_iv_entry *entry)
6603 DRM_ERROR("Illegal register access in command stream\n");
6604 schedule_work(&adev->reset_work);
6608 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
6609 struct amdgpu_irq_src *source,
6610 struct amdgpu_iv_entry *entry)
6612 DRM_ERROR("Illegal instruction in command stream\n");
6613 schedule_work(&adev->reset_work);
6617 static int gfx_v8_0_kiq_set_interrupt_state(struct amdgpu_device *adev,
6618 struct amdgpu_irq_src *src,
6620 enum amdgpu_interrupt_state state)
6622 struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
6625 case AMDGPU_CP_KIQ_IRQ_DRIVER0:
6626 WREG32_FIELD(CPC_INT_CNTL, GENERIC2_INT_ENABLE,
6627 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6629 WREG32_FIELD_OFFSET(CP_ME1_PIPE0_INT_CNTL,
6631 GENERIC2_INT_ENABLE,
6632 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6634 WREG32_FIELD_OFFSET(CP_ME2_PIPE0_INT_CNTL,
6636 GENERIC2_INT_ENABLE,
6637 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6640 BUG(); /* kiq only support GENERIC2_INT now */
6646 static int gfx_v8_0_kiq_irq(struct amdgpu_device *adev,
6647 struct amdgpu_irq_src *source,
6648 struct amdgpu_iv_entry *entry)
6650 u8 me_id, pipe_id, queue_id;
6651 struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
6653 me_id = (entry->ring_id & 0x0c) >> 2;
6654 pipe_id = (entry->ring_id & 0x03) >> 0;
6655 queue_id = (entry->ring_id & 0x70) >> 4;
6656 DRM_DEBUG("IH: CPC GENERIC2_INT, me:%d, pipe:%d, queue:%d\n",
6657 me_id, pipe_id, queue_id);
6659 amdgpu_fence_process(ring);
6663 static const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
6665 .early_init = gfx_v8_0_early_init,
6666 .late_init = gfx_v8_0_late_init,
6667 .sw_init = gfx_v8_0_sw_init,
6668 .sw_fini = gfx_v8_0_sw_fini,
6669 .hw_init = gfx_v8_0_hw_init,
6670 .hw_fini = gfx_v8_0_hw_fini,
6671 .suspend = gfx_v8_0_suspend,
6672 .resume = gfx_v8_0_resume,
6673 .is_idle = gfx_v8_0_is_idle,
6674 .wait_for_idle = gfx_v8_0_wait_for_idle,
6675 .check_soft_reset = gfx_v8_0_check_soft_reset,
6676 .pre_soft_reset = gfx_v8_0_pre_soft_reset,
6677 .soft_reset = gfx_v8_0_soft_reset,
6678 .post_soft_reset = gfx_v8_0_post_soft_reset,
6679 .set_clockgating_state = gfx_v8_0_set_clockgating_state,
6680 .set_powergating_state = gfx_v8_0_set_powergating_state,
6681 .get_clockgating_state = gfx_v8_0_get_clockgating_state,
6684 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
6685 .type = AMDGPU_RING_TYPE_GFX,
6687 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6688 .support_64bit_ptrs = false,
6689 .get_rptr = gfx_v8_0_ring_get_rptr,
6690 .get_wptr = gfx_v8_0_ring_get_wptr_gfx,
6691 .set_wptr = gfx_v8_0_ring_set_wptr_gfx,
6692 .emit_frame_size = /* maximum 215dw if count 16 IBs in */
6694 7 + /* PIPELINE_SYNC */
6696 8 + /* FENCE for VM_FLUSH */
6697 20 + /* GDS switch */
6698 4 + /* double SWITCH_BUFFER,
6699 the first COND_EXEC jump to the place just
6700 prior to this double SWITCH_BUFFER */
6708 8 + 8 + /* FENCE x2 */
6709 2, /* SWITCH_BUFFER */
6710 .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_gfx */
6711 .emit_ib = gfx_v8_0_ring_emit_ib_gfx,
6712 .emit_fence = gfx_v8_0_ring_emit_fence_gfx,
6713 .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6714 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6715 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6716 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6717 .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
6718 .test_ring = gfx_v8_0_ring_test_ring,
6719 .test_ib = gfx_v8_0_ring_test_ib,
6720 .insert_nop = amdgpu_ring_insert_nop,
6721 .pad_ib = amdgpu_ring_generic_pad_ib,
6722 .emit_switch_buffer = gfx_v8_ring_emit_sb,
6723 .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
6724 .init_cond_exec = gfx_v8_0_ring_emit_init_cond_exec,
6725 .patch_cond_exec = gfx_v8_0_ring_emit_patch_cond_exec,
6728 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
6729 .type = AMDGPU_RING_TYPE_COMPUTE,
6731 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6732 .support_64bit_ptrs = false,
6733 .get_rptr = gfx_v8_0_ring_get_rptr,
6734 .get_wptr = gfx_v8_0_ring_get_wptr_compute,
6735 .set_wptr = gfx_v8_0_ring_set_wptr_compute,
6737 20 + /* gfx_v8_0_ring_emit_gds_switch */
6738 7 + /* gfx_v8_0_ring_emit_hdp_flush */
6739 5 + /* gfx_v8_0_ring_emit_hdp_invalidate */
6740 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6741 17 + /* gfx_v8_0_ring_emit_vm_flush */
6742 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
6743 .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */
6744 .emit_ib = gfx_v8_0_ring_emit_ib_compute,
6745 .emit_fence = gfx_v8_0_ring_emit_fence_compute,
6746 .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6747 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6748 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6749 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6750 .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
6751 .test_ring = gfx_v8_0_ring_test_ring,
6752 .test_ib = gfx_v8_0_ring_test_ib,
6753 .insert_nop = amdgpu_ring_insert_nop,
6754 .pad_ib = amdgpu_ring_generic_pad_ib,
6757 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {
6758 .type = AMDGPU_RING_TYPE_KIQ,
6760 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6761 .support_64bit_ptrs = false,
6762 .get_rptr = gfx_v8_0_ring_get_rptr,
6763 .get_wptr = gfx_v8_0_ring_get_wptr_compute,
6764 .set_wptr = gfx_v8_0_ring_set_wptr_compute,
6766 20 + /* gfx_v8_0_ring_emit_gds_switch */
6767 7 + /* gfx_v8_0_ring_emit_hdp_flush */
6768 5 + /* gfx_v8_0_ring_emit_hdp_invalidate */
6769 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6770 17 + /* gfx_v8_0_ring_emit_vm_flush */
6771 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */
6772 .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */
6773 .emit_ib = gfx_v8_0_ring_emit_ib_compute,
6774 .emit_fence = gfx_v8_0_ring_emit_fence_kiq,
6775 .test_ring = gfx_v8_0_ring_test_ring,
6776 .test_ib = gfx_v8_0_ring_test_ib,
6777 .insert_nop = amdgpu_ring_insert_nop,
6778 .pad_ib = amdgpu_ring_generic_pad_ib,
6779 .emit_rreg = gfx_v8_0_ring_emit_rreg,
6780 .emit_wreg = gfx_v8_0_ring_emit_wreg,
6783 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
6787 adev->gfx.kiq.ring.funcs = &gfx_v8_0_ring_funcs_kiq;
6789 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
6790 adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
6792 for (i = 0; i < adev->gfx.num_compute_rings; i++)
6793 adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
6796 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
6797 .set = gfx_v8_0_set_eop_interrupt_state,
6798 .process = gfx_v8_0_eop_irq,
6801 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
6802 .set = gfx_v8_0_set_priv_reg_fault_state,
6803 .process = gfx_v8_0_priv_reg_irq,
6806 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
6807 .set = gfx_v8_0_set_priv_inst_fault_state,
6808 .process = gfx_v8_0_priv_inst_irq,
6811 static const struct amdgpu_irq_src_funcs gfx_v8_0_kiq_irq_funcs = {
6812 .set = gfx_v8_0_kiq_set_interrupt_state,
6813 .process = gfx_v8_0_kiq_irq,
6816 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
6818 adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
6819 adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
6821 adev->gfx.priv_reg_irq.num_types = 1;
6822 adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
6824 adev->gfx.priv_inst_irq.num_types = 1;
6825 adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
6827 adev->gfx.kiq.irq.num_types = AMDGPU_CP_KIQ_IRQ_LAST;
6828 adev->gfx.kiq.irq.funcs = &gfx_v8_0_kiq_irq_funcs;
6831 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
6833 adev->gfx.rlc.funcs = &iceland_rlc_funcs;
6836 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
6838 /* init asci gds info */
6839 adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
6840 adev->gds.gws.total_size = 64;
6841 adev->gds.oa.total_size = 16;
6843 if (adev->gds.mem.total_size == 64 * 1024) {
6844 adev->gds.mem.gfx_partition_size = 4096;
6845 adev->gds.mem.cs_partition_size = 4096;
6847 adev->gds.gws.gfx_partition_size = 4;
6848 adev->gds.gws.cs_partition_size = 4;
6850 adev->gds.oa.gfx_partition_size = 4;
6851 adev->gds.oa.cs_partition_size = 1;
6853 adev->gds.mem.gfx_partition_size = 1024;
6854 adev->gds.mem.cs_partition_size = 1024;
6856 adev->gds.gws.gfx_partition_size = 16;
6857 adev->gds.gws.cs_partition_size = 16;
6859 adev->gds.oa.gfx_partition_size = 4;
6860 adev->gds.oa.cs_partition_size = 4;
6864 static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
6872 data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
6873 data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
6875 WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
6878 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
6882 data = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) |
6883 RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
6885 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
6887 return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask;
6890 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
6892 int i, j, k, counter, active_cu_number = 0;
6893 u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
6894 struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
6895 unsigned disable_masks[4 * 2];
6898 memset(cu_info, 0, sizeof(*cu_info));
6900 if (adev->flags & AMD_IS_APU)
6903 ao_cu_num = adev->gfx.config.max_cu_per_sh;
6905 amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
6907 mutex_lock(&adev->grbm_idx_mutex);
6908 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
6909 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
6913 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
6915 gfx_v8_0_set_user_cu_inactive_bitmap(
6916 adev, disable_masks[i * 2 + j]);
6917 bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
6918 cu_info->bitmap[i][j] = bitmap;
6920 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
6921 if (bitmap & mask) {
6922 if (counter < ao_cu_num)
6928 active_cu_number += counter;
6930 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
6931 cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
6934 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6935 mutex_unlock(&adev->grbm_idx_mutex);
6937 cu_info->number = active_cu_number;
6938 cu_info->ao_cu_mask = ao_cu_mask;
6941 const struct amdgpu_ip_block_version gfx_v8_0_ip_block =
6943 .type = AMD_IP_BLOCK_TYPE_GFX,
6947 .funcs = &gfx_v8_0_ip_funcs,
6950 const struct amdgpu_ip_block_version gfx_v8_1_ip_block =
6952 .type = AMD_IP_BLOCK_TYPE_GFX,
6956 .funcs = &gfx_v8_0_ip_funcs,
6959 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
6961 uint64_t ce_payload_addr;
6964 struct vi_ce_ib_state regular;
6965 struct vi_ce_ib_state_chained_ib chained;
6968 if (ring->adev->virt.chained_ib_support) {
6969 ce_payload_addr = AMDGPU_VA_RESERVED_SIZE - 2 * 4096 +
6970 offsetof(struct vi_gfx_meta_data_chained_ib, ce_payload);
6971 cnt_ce = (sizeof(ce_payload.chained) >> 2) + 4 - 2;
6973 ce_payload_addr = AMDGPU_VA_RESERVED_SIZE - 2 * 4096 +
6974 offsetof(struct vi_gfx_meta_data, ce_payload);
6975 cnt_ce = (sizeof(ce_payload.regular) >> 2) + 4 - 2;
6978 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_ce));
6979 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
6980 WRITE_DATA_DST_SEL(8) |
6982 WRITE_DATA_CACHE_POLICY(0));
6983 amdgpu_ring_write(ring, lower_32_bits(ce_payload_addr));
6984 amdgpu_ring_write(ring, upper_32_bits(ce_payload_addr));
6985 amdgpu_ring_write_multiple(ring, (void *)&ce_payload, cnt_ce - 2);
6988 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring)
6990 uint64_t de_payload_addr, gds_addr, csa_addr;
6993 struct vi_de_ib_state regular;
6994 struct vi_de_ib_state_chained_ib chained;
6997 csa_addr = AMDGPU_VA_RESERVED_SIZE - 2 * 4096;
6998 gds_addr = csa_addr + 4096;
6999 if (ring->adev->virt.chained_ib_support) {
7000 de_payload.chained.gds_backup_addrlo = lower_32_bits(gds_addr);
7001 de_payload.chained.gds_backup_addrhi = upper_32_bits(gds_addr);
7002 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data_chained_ib, de_payload);
7003 cnt_de = (sizeof(de_payload.chained) >> 2) + 4 - 2;
7005 de_payload.regular.gds_backup_addrlo = lower_32_bits(gds_addr);
7006 de_payload.regular.gds_backup_addrhi = upper_32_bits(gds_addr);
7007 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data, de_payload);
7008 cnt_de = (sizeof(de_payload.regular) >> 2) + 4 - 2;
7011 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_de));
7012 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
7013 WRITE_DATA_DST_SEL(8) |
7015 WRITE_DATA_CACHE_POLICY(0));
7016 amdgpu_ring_write(ring, lower_32_bits(de_payload_addr));
7017 amdgpu_ring_write(ring, upper_32_bits(de_payload_addr));
7018 amdgpu_ring_write_multiple(ring, (void *)&de_payload, cnt_de - 2);