2 * Copyright 2014 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
23 #include <linux/firmware.h>
26 #include "amdgpu_gfx.h"
28 #include "vi_structs.h"
30 #include "amdgpu_ucode.h"
31 #include "amdgpu_atombios.h"
32 #include "atombios_i2c.h"
33 #include "clearstate_vi.h"
35 #include "gmc/gmc_8_2_d.h"
36 #include "gmc/gmc_8_2_sh_mask.h"
38 #include "oss/oss_3_0_d.h"
39 #include "oss/oss_3_0_sh_mask.h"
41 #include "bif/bif_5_0_d.h"
42 #include "bif/bif_5_0_sh_mask.h"
43 #include "gca/gfx_8_0_d.h"
44 #include "gca/gfx_8_0_enum.h"
45 #include "gca/gfx_8_0_sh_mask.h"
46 #include "gca/gfx_8_0_enum.h"
48 #include "dce/dce_10_0_d.h"
49 #include "dce/dce_10_0_sh_mask.h"
51 #include "smu/smu_7_1_3_d.h"
53 #define GFX8_NUM_GFX_RINGS 1
54 #define GFX8_MEC_HPD_SIZE 2048
56 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
57 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
58 #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
59 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
61 #define ARRAY_MODE(x) ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
62 #define PIPE_CONFIG(x) ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
63 #define TILE_SPLIT(x) ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
64 #define MICRO_TILE_MODE_NEW(x) ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
65 #define SAMPLE_SPLIT(x) ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
66 #define BANK_WIDTH(x) ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
67 #define BANK_HEIGHT(x) ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
68 #define MACRO_TILE_ASPECT(x) ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
69 #define NUM_BANKS(x) ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
71 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK 0x00000001L
72 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK 0x00000002L
73 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK 0x00000004L
74 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK 0x00000008L
75 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK 0x00000010L
76 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK 0x00000020L
79 #define SET_BPM_SERDES_CMD 1
80 #define CLE_BPM_SERDES_CMD 0
82 /* BPM Register Address*/
84 BPM_REG_CGLS_EN = 0, /* Enable/Disable CGLS */
85 BPM_REG_CGLS_ON, /* ON/OFF CGLS: shall be controlled by RLC FW */
86 BPM_REG_CGCG_OVERRIDE, /* Set/Clear CGCG Override */
87 BPM_REG_MGCG_OVERRIDE, /* Set/Clear MGCG Override */
88 BPM_REG_FGCG_OVERRIDE, /* Set/Clear FGCG Override */
92 #define RLC_FormatDirectRegListLength 14
94 MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
95 MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
96 MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
97 MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
98 MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
99 MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
101 MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
102 MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
103 MODULE_FIRMWARE("amdgpu/stoney_me.bin");
104 MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
105 MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
107 MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
108 MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
109 MODULE_FIRMWARE("amdgpu/tonga_me.bin");
110 MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
111 MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
112 MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
114 MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
115 MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
116 MODULE_FIRMWARE("amdgpu/topaz_me.bin");
117 MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
118 MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
120 MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
121 MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
122 MODULE_FIRMWARE("amdgpu/fiji_me.bin");
123 MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
124 MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
125 MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
127 MODULE_FIRMWARE("amdgpu/polaris11_ce.bin");
128 MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin");
129 MODULE_FIRMWARE("amdgpu/polaris11_me.bin");
130 MODULE_FIRMWARE("amdgpu/polaris11_mec.bin");
131 MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin");
132 MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin");
134 MODULE_FIRMWARE("amdgpu/polaris10_ce.bin");
135 MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin");
136 MODULE_FIRMWARE("amdgpu/polaris10_me.bin");
137 MODULE_FIRMWARE("amdgpu/polaris10_mec.bin");
138 MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin");
139 MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin");
141 MODULE_FIRMWARE("amdgpu/polaris12_ce.bin");
142 MODULE_FIRMWARE("amdgpu/polaris12_pfp.bin");
143 MODULE_FIRMWARE("amdgpu/polaris12_me.bin");
144 MODULE_FIRMWARE("amdgpu/polaris12_mec.bin");
145 MODULE_FIRMWARE("amdgpu/polaris12_mec2.bin");
146 MODULE_FIRMWARE("amdgpu/polaris12_rlc.bin");
148 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
150 {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
151 {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
152 {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
153 {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
154 {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
155 {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
156 {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
157 {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
158 {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
159 {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
160 {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
161 {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
162 {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
163 {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
164 {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
165 {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
168 static const u32 golden_settings_tonga_a11[] =
170 mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
171 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
172 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
173 mmGB_GPU_ID, 0x0000000f, 0x00000000,
174 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
175 mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
176 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
177 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
178 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
179 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
180 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
181 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
182 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
183 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
184 mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
185 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
188 static const u32 tonga_golden_common_all[] =
190 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
191 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
192 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
193 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
194 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
195 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
196 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
197 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
200 static const u32 tonga_mgcg_cgcg_init[] =
202 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
203 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
204 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
205 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
206 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
207 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
208 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
209 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
210 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
211 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
212 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
213 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
214 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
215 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
216 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
217 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
218 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
219 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
220 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
221 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
222 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
223 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
224 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
225 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
226 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
227 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
228 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
229 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
230 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
231 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
232 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
233 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
234 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
235 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
236 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
237 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
238 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
239 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
240 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
241 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
242 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
243 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
244 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
245 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
246 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
247 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
248 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
249 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
250 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
251 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
252 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
253 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
254 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
255 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
256 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
257 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
258 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
259 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
260 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
261 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
262 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
263 mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
264 mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
265 mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
266 mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
267 mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
268 mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
269 mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
270 mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
271 mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
272 mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
273 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
274 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
275 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
276 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
279 static const u32 golden_settings_polaris11_a11[] =
281 mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208,
282 mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
283 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
284 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
285 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
286 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
287 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
288 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
289 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
290 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
291 mmSQ_CONFIG, 0x07f80000, 0x01180000,
292 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
293 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
294 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
295 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
296 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
297 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
300 static const u32 polaris11_golden_common_all[] =
302 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
303 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002,
304 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
305 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
306 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
307 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
310 static const u32 golden_settings_polaris10_a11[] =
312 mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
313 mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
314 mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
315 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
316 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
317 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
318 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
319 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
320 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a,
321 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
322 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
323 mmSQ_CONFIG, 0x07f80000, 0x07180000,
324 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
325 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
326 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
327 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
328 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
331 static const u32 polaris10_golden_common_all[] =
333 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
334 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
335 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
336 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
337 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
338 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
339 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
340 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
343 static const u32 fiji_golden_common_all[] =
345 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
346 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
347 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
348 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
349 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
350 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
351 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
352 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
353 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
354 mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
357 static const u32 golden_settings_fiji_a10[] =
359 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
360 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
361 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
362 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
363 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
364 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
365 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
366 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
367 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
368 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
369 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
372 static const u32 fiji_mgcg_cgcg_init[] =
374 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
375 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
376 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
377 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
378 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
379 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
380 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
381 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
382 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
383 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
384 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
385 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
386 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
387 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
388 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
389 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
390 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
391 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
392 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
393 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
394 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
395 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
396 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
397 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
398 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
399 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
400 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
401 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
402 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
403 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
404 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
405 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
406 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
407 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
408 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
411 static const u32 golden_settings_iceland_a11[] =
413 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
414 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
415 mmDB_DEBUG3, 0xc0000000, 0xc0000000,
416 mmGB_GPU_ID, 0x0000000f, 0x00000000,
417 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
418 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
419 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
420 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
421 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
422 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
423 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
424 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
425 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
426 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
427 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
428 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
431 static const u32 iceland_golden_common_all[] =
433 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
434 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
435 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
436 mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
437 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
438 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
439 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
440 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
443 static const u32 iceland_mgcg_cgcg_init[] =
445 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
446 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
447 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
448 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
449 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
450 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
451 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
452 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
453 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
454 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
455 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
456 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
457 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
458 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
459 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
460 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
461 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
462 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
463 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
464 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
465 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
466 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
467 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
468 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
469 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
470 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
471 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
472 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
473 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
474 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
475 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
476 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
477 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
478 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
479 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
480 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
481 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
482 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
483 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
484 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
485 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
486 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
487 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
488 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
489 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
490 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
491 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
492 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
493 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
494 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
495 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
496 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
497 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
498 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
499 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
500 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
501 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
502 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
503 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
504 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
505 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
506 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
507 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
508 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
511 static const u32 cz_golden_settings_a11[] =
513 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
514 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
515 mmGB_GPU_ID, 0x0000000f, 0x00000000,
516 mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
517 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
518 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
519 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
520 mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
521 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
522 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
523 mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
524 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
527 static const u32 cz_golden_common_all[] =
529 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
530 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
531 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
532 mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
533 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
534 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
535 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
536 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
539 static const u32 cz_mgcg_cgcg_init[] =
541 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
542 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
543 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
544 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
545 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
546 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
547 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
548 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
549 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
550 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
551 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
552 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
553 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
554 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
555 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
556 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
557 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
558 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
559 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
560 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
561 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
562 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
563 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
564 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
565 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
566 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
567 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
568 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
569 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
570 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
571 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
572 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
573 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
574 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
575 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
576 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
577 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
578 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
579 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
580 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
581 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
582 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
583 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
584 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
585 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
586 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
587 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
588 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
589 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
590 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
591 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
592 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
593 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
594 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
595 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
596 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
597 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
598 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
599 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
600 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
601 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
602 mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
603 mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
604 mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
605 mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
606 mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
607 mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
608 mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
609 mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
610 mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
611 mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
612 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
613 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
614 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
615 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
618 static const u32 stoney_golden_settings_a11[] =
620 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
621 mmGB_GPU_ID, 0x0000000f, 0x00000000,
622 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
623 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
624 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
625 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
626 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
627 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
628 mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
629 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
632 static const u32 stoney_golden_common_all[] =
634 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
635 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
636 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
637 mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
638 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
639 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
640 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
641 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
644 static const u32 stoney_mgcg_cgcg_init[] =
646 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
647 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
648 mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
649 mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
650 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
653 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
654 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
655 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
656 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
657 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
658 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
659 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring);
660 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring);
662 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
664 switch (adev->asic_type) {
666 amdgpu_program_register_sequence(adev,
667 iceland_mgcg_cgcg_init,
668 (const u32)ARRAY_SIZE(iceland_mgcg_cgcg_init));
669 amdgpu_program_register_sequence(adev,
670 golden_settings_iceland_a11,
671 (const u32)ARRAY_SIZE(golden_settings_iceland_a11));
672 amdgpu_program_register_sequence(adev,
673 iceland_golden_common_all,
674 (const u32)ARRAY_SIZE(iceland_golden_common_all));
677 amdgpu_program_register_sequence(adev,
679 (const u32)ARRAY_SIZE(fiji_mgcg_cgcg_init));
680 amdgpu_program_register_sequence(adev,
681 golden_settings_fiji_a10,
682 (const u32)ARRAY_SIZE(golden_settings_fiji_a10));
683 amdgpu_program_register_sequence(adev,
684 fiji_golden_common_all,
685 (const u32)ARRAY_SIZE(fiji_golden_common_all));
689 amdgpu_program_register_sequence(adev,
690 tonga_mgcg_cgcg_init,
691 (const u32)ARRAY_SIZE(tonga_mgcg_cgcg_init));
692 amdgpu_program_register_sequence(adev,
693 golden_settings_tonga_a11,
694 (const u32)ARRAY_SIZE(golden_settings_tonga_a11));
695 amdgpu_program_register_sequence(adev,
696 tonga_golden_common_all,
697 (const u32)ARRAY_SIZE(tonga_golden_common_all));
701 amdgpu_program_register_sequence(adev,
702 golden_settings_polaris11_a11,
703 (const u32)ARRAY_SIZE(golden_settings_polaris11_a11));
704 amdgpu_program_register_sequence(adev,
705 polaris11_golden_common_all,
706 (const u32)ARRAY_SIZE(polaris11_golden_common_all));
709 amdgpu_program_register_sequence(adev,
710 golden_settings_polaris10_a11,
711 (const u32)ARRAY_SIZE(golden_settings_polaris10_a11));
712 amdgpu_program_register_sequence(adev,
713 polaris10_golden_common_all,
714 (const u32)ARRAY_SIZE(polaris10_golden_common_all));
715 WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C);
716 if (adev->pdev->revision == 0xc7 &&
717 ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) ||
718 (adev->pdev->subsystem_device == 0x4a8 && adev->pdev->subsystem_vendor == 0x1043) ||
719 (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1682))) {
720 amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD);
721 amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0);
725 amdgpu_program_register_sequence(adev,
727 (const u32)ARRAY_SIZE(cz_mgcg_cgcg_init));
728 amdgpu_program_register_sequence(adev,
729 cz_golden_settings_a11,
730 (const u32)ARRAY_SIZE(cz_golden_settings_a11));
731 amdgpu_program_register_sequence(adev,
732 cz_golden_common_all,
733 (const u32)ARRAY_SIZE(cz_golden_common_all));
736 amdgpu_program_register_sequence(adev,
737 stoney_mgcg_cgcg_init,
738 (const u32)ARRAY_SIZE(stoney_mgcg_cgcg_init));
739 amdgpu_program_register_sequence(adev,
740 stoney_golden_settings_a11,
741 (const u32)ARRAY_SIZE(stoney_golden_settings_a11));
742 amdgpu_program_register_sequence(adev,
743 stoney_golden_common_all,
744 (const u32)ARRAY_SIZE(stoney_golden_common_all));
751 static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
753 adev->gfx.scratch.num_reg = 8;
754 adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
755 adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
758 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
760 struct amdgpu_device *adev = ring->adev;
766 r = amdgpu_gfx_scratch_get(adev, &scratch);
768 DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
771 WREG32(scratch, 0xCAFEDEAD);
772 r = amdgpu_ring_alloc(ring, 3);
774 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
776 amdgpu_gfx_scratch_free(adev, scratch);
779 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
780 amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
781 amdgpu_ring_write(ring, 0xDEADBEEF);
782 amdgpu_ring_commit(ring);
784 for (i = 0; i < adev->usec_timeout; i++) {
785 tmp = RREG32(scratch);
786 if (tmp == 0xDEADBEEF)
790 if (i < adev->usec_timeout) {
791 DRM_INFO("ring test on %d succeeded in %d usecs\n",
794 DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
795 ring->idx, scratch, tmp);
798 amdgpu_gfx_scratch_free(adev, scratch);
802 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
804 struct amdgpu_device *adev = ring->adev;
806 struct dma_fence *f = NULL;
811 r = amdgpu_gfx_scratch_get(adev, &scratch);
813 DRM_ERROR("amdgpu: failed to get scratch reg (%ld).\n", r);
816 WREG32(scratch, 0xCAFEDEAD);
817 memset(&ib, 0, sizeof(ib));
818 r = amdgpu_ib_get(adev, NULL, 256, &ib);
820 DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
823 ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
824 ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START));
825 ib.ptr[2] = 0xDEADBEEF;
828 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
832 r = dma_fence_wait_timeout(f, false, timeout);
834 DRM_ERROR("amdgpu: IB test timed out.\n");
838 DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
841 tmp = RREG32(scratch);
842 if (tmp == 0xDEADBEEF) {
843 DRM_INFO("ib test on ring %d succeeded\n", ring->idx);
846 DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n",
851 amdgpu_ib_free(adev, &ib, NULL);
854 amdgpu_gfx_scratch_free(adev, scratch);
859 static void gfx_v8_0_free_microcode(struct amdgpu_device *adev)
861 release_firmware(adev->gfx.pfp_fw);
862 adev->gfx.pfp_fw = NULL;
863 release_firmware(adev->gfx.me_fw);
864 adev->gfx.me_fw = NULL;
865 release_firmware(adev->gfx.ce_fw);
866 adev->gfx.ce_fw = NULL;
867 release_firmware(adev->gfx.rlc_fw);
868 adev->gfx.rlc_fw = NULL;
869 release_firmware(adev->gfx.mec_fw);
870 adev->gfx.mec_fw = NULL;
871 if ((adev->asic_type != CHIP_STONEY) &&
872 (adev->asic_type != CHIP_TOPAZ))
873 release_firmware(adev->gfx.mec2_fw);
874 adev->gfx.mec2_fw = NULL;
876 kfree(adev->gfx.rlc.register_list_format);
879 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
881 const char *chip_name;
884 struct amdgpu_firmware_info *info = NULL;
885 const struct common_firmware_header *header = NULL;
886 const struct gfx_firmware_header_v1_0 *cp_hdr;
887 const struct rlc_firmware_header_v2_0 *rlc_hdr;
888 unsigned int *tmp = NULL, i;
892 switch (adev->asic_type) {
900 chip_name = "carrizo";
906 chip_name = "polaris11";
909 chip_name = "polaris10";
912 chip_name = "polaris12";
915 chip_name = "stoney";
921 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
922 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
925 err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
928 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
929 adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
930 adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
932 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
933 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
936 err = amdgpu_ucode_validate(adev->gfx.me_fw);
939 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
940 adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
942 adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
944 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
945 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
948 err = amdgpu_ucode_validate(adev->gfx.ce_fw);
951 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
952 adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
953 adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
956 * Support for MCBP/Virtualization in combination with chained IBs is
957 * formal released on feature version #46
959 if (adev->gfx.ce_feature_version >= 46 &&
960 adev->gfx.pfp_feature_version >= 46) {
961 adev->virt.chained_ib_support = true;
962 DRM_INFO("Chained IB support enabled!\n");
964 adev->virt.chained_ib_support = false;
966 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
967 err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
970 err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
971 rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
972 adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
973 adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
975 adev->gfx.rlc.save_and_restore_offset =
976 le32_to_cpu(rlc_hdr->save_and_restore_offset);
977 adev->gfx.rlc.clear_state_descriptor_offset =
978 le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
979 adev->gfx.rlc.avail_scratch_ram_locations =
980 le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
981 adev->gfx.rlc.reg_restore_list_size =
982 le32_to_cpu(rlc_hdr->reg_restore_list_size);
983 adev->gfx.rlc.reg_list_format_start =
984 le32_to_cpu(rlc_hdr->reg_list_format_start);
985 adev->gfx.rlc.reg_list_format_separate_start =
986 le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
987 adev->gfx.rlc.starting_offsets_start =
988 le32_to_cpu(rlc_hdr->starting_offsets_start);
989 adev->gfx.rlc.reg_list_format_size_bytes =
990 le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
991 adev->gfx.rlc.reg_list_size_bytes =
992 le32_to_cpu(rlc_hdr->reg_list_size_bytes);
994 adev->gfx.rlc.register_list_format =
995 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
996 adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
998 if (!adev->gfx.rlc.register_list_format) {
1003 tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1004 le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1005 for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++)
1006 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
1008 adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1010 tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1011 le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1012 for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++)
1013 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
1015 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1016 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1019 err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1022 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1023 adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1024 adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1026 if ((adev->asic_type != CHIP_STONEY) &&
1027 (adev->asic_type != CHIP_TOPAZ)) {
1028 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1029 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1031 err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1034 cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1035 adev->gfx.mec2_fw->data;
1036 adev->gfx.mec2_fw_version =
1037 le32_to_cpu(cp_hdr->header.ucode_version);
1038 adev->gfx.mec2_feature_version =
1039 le32_to_cpu(cp_hdr->ucode_feature_version);
1042 adev->gfx.mec2_fw = NULL;
1046 if (adev->firmware.load_type == AMDGPU_FW_LOAD_SMU) {
1047 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1048 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1049 info->fw = adev->gfx.pfp_fw;
1050 header = (const struct common_firmware_header *)info->fw->data;
1051 adev->firmware.fw_size +=
1052 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1054 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1055 info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1056 info->fw = adev->gfx.me_fw;
1057 header = (const struct common_firmware_header *)info->fw->data;
1058 adev->firmware.fw_size +=
1059 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1061 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1062 info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1063 info->fw = adev->gfx.ce_fw;
1064 header = (const struct common_firmware_header *)info->fw->data;
1065 adev->firmware.fw_size +=
1066 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1068 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1069 info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1070 info->fw = adev->gfx.rlc_fw;
1071 header = (const struct common_firmware_header *)info->fw->data;
1072 adev->firmware.fw_size +=
1073 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1075 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1076 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1077 info->fw = adev->gfx.mec_fw;
1078 header = (const struct common_firmware_header *)info->fw->data;
1079 adev->firmware.fw_size +=
1080 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1082 /* we need account JT in */
1083 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1084 adev->firmware.fw_size +=
1085 ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE);
1087 if (amdgpu_sriov_vf(adev)) {
1088 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_STORAGE];
1089 info->ucode_id = AMDGPU_UCODE_ID_STORAGE;
1090 info->fw = adev->gfx.mec_fw;
1091 adev->firmware.fw_size +=
1092 ALIGN(le32_to_cpu(64 * PAGE_SIZE), PAGE_SIZE);
1095 if (adev->gfx.mec2_fw) {
1096 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1097 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1098 info->fw = adev->gfx.mec2_fw;
1099 header = (const struct common_firmware_header *)info->fw->data;
1100 adev->firmware.fw_size +=
1101 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1109 "gfx8: Failed to load firmware \"%s\"\n",
1111 release_firmware(adev->gfx.pfp_fw);
1112 adev->gfx.pfp_fw = NULL;
1113 release_firmware(adev->gfx.me_fw);
1114 adev->gfx.me_fw = NULL;
1115 release_firmware(adev->gfx.ce_fw);
1116 adev->gfx.ce_fw = NULL;
1117 release_firmware(adev->gfx.rlc_fw);
1118 adev->gfx.rlc_fw = NULL;
1119 release_firmware(adev->gfx.mec_fw);
1120 adev->gfx.mec_fw = NULL;
1121 release_firmware(adev->gfx.mec2_fw);
1122 adev->gfx.mec2_fw = NULL;
1127 static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
1128 volatile u32 *buffer)
1131 const struct cs_section_def *sect = NULL;
1132 const struct cs_extent_def *ext = NULL;
1134 if (adev->gfx.rlc.cs_data == NULL)
1139 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1140 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1142 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1143 buffer[count++] = cpu_to_le32(0x80000000);
1144 buffer[count++] = cpu_to_le32(0x80000000);
1146 for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1147 for (ext = sect->section; ext->extent != NULL; ++ext) {
1148 if (sect->id == SECT_CONTEXT) {
1150 cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1151 buffer[count++] = cpu_to_le32(ext->reg_index -
1152 PACKET3_SET_CONTEXT_REG_START);
1153 for (i = 0; i < ext->reg_count; i++)
1154 buffer[count++] = cpu_to_le32(ext->extent[i]);
1161 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1162 buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
1163 PACKET3_SET_CONTEXT_REG_START);
1164 buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config);
1165 buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1);
1167 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1168 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1170 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1171 buffer[count++] = cpu_to_le32(0);
1174 static void cz_init_cp_jump_table(struct amdgpu_device *adev)
1176 const __le32 *fw_data;
1177 volatile u32 *dst_ptr;
1178 int me, i, max_me = 4;
1180 u32 table_offset, table_size;
1182 if (adev->asic_type == CHIP_CARRIZO)
1185 /* write the cp table buffer */
1186 dst_ptr = adev->gfx.rlc.cp_table_ptr;
1187 for (me = 0; me < max_me; me++) {
1189 const struct gfx_firmware_header_v1_0 *hdr =
1190 (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1191 fw_data = (const __le32 *)
1192 (adev->gfx.ce_fw->data +
1193 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1194 table_offset = le32_to_cpu(hdr->jt_offset);
1195 table_size = le32_to_cpu(hdr->jt_size);
1196 } else if (me == 1) {
1197 const struct gfx_firmware_header_v1_0 *hdr =
1198 (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1199 fw_data = (const __le32 *)
1200 (adev->gfx.pfp_fw->data +
1201 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1202 table_offset = le32_to_cpu(hdr->jt_offset);
1203 table_size = le32_to_cpu(hdr->jt_size);
1204 } else if (me == 2) {
1205 const struct gfx_firmware_header_v1_0 *hdr =
1206 (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1207 fw_data = (const __le32 *)
1208 (adev->gfx.me_fw->data +
1209 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1210 table_offset = le32_to_cpu(hdr->jt_offset);
1211 table_size = le32_to_cpu(hdr->jt_size);
1212 } else if (me == 3) {
1213 const struct gfx_firmware_header_v1_0 *hdr =
1214 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1215 fw_data = (const __le32 *)
1216 (adev->gfx.mec_fw->data +
1217 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1218 table_offset = le32_to_cpu(hdr->jt_offset);
1219 table_size = le32_to_cpu(hdr->jt_size);
1220 } else if (me == 4) {
1221 const struct gfx_firmware_header_v1_0 *hdr =
1222 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
1223 fw_data = (const __le32 *)
1224 (adev->gfx.mec2_fw->data +
1225 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1226 table_offset = le32_to_cpu(hdr->jt_offset);
1227 table_size = le32_to_cpu(hdr->jt_size);
1230 for (i = 0; i < table_size; i ++) {
1231 dst_ptr[bo_offset + i] =
1232 cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
1235 bo_offset += table_size;
1239 static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev)
1241 amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, NULL, NULL);
1242 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, NULL, NULL);
1245 static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
1247 volatile u32 *dst_ptr;
1249 const struct cs_section_def *cs_data;
1252 adev->gfx.rlc.cs_data = vi_cs_data;
1254 cs_data = adev->gfx.rlc.cs_data;
1257 /* clear state block */
1258 adev->gfx.rlc.clear_state_size = dws = gfx_v8_0_get_csb_size(adev);
1260 r = amdgpu_bo_create_reserved(adev, dws * 4, PAGE_SIZE,
1261 AMDGPU_GEM_DOMAIN_VRAM,
1262 &adev->gfx.rlc.clear_state_obj,
1263 &adev->gfx.rlc.clear_state_gpu_addr,
1264 (void **)&adev->gfx.rlc.cs_ptr);
1266 dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r);
1267 gfx_v8_0_rlc_fini(adev);
1271 /* set up the cs buffer */
1272 dst_ptr = adev->gfx.rlc.cs_ptr;
1273 gfx_v8_0_get_csb_buffer(adev, dst_ptr);
1274 amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
1275 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1278 if ((adev->asic_type == CHIP_CARRIZO) ||
1279 (adev->asic_type == CHIP_STONEY)) {
1280 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1281 r = amdgpu_bo_create_reserved(adev, adev->gfx.rlc.cp_table_size,
1282 PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
1283 &adev->gfx.rlc.cp_table_obj,
1284 &adev->gfx.rlc.cp_table_gpu_addr,
1285 (void **)&adev->gfx.rlc.cp_table_ptr);
1287 dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r);
1291 cz_init_cp_jump_table(adev);
1293 amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj);
1294 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1300 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
1302 amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1305 static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
1309 size_t mec_hpd_size;
1311 bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1313 /* take ownership of the relevant compute queues */
1314 amdgpu_gfx_compute_queue_acquire(adev);
1316 mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE;
1318 r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1319 AMDGPU_GEM_DOMAIN_GTT,
1320 &adev->gfx.mec.hpd_eop_obj,
1321 &adev->gfx.mec.hpd_eop_gpu_addr,
1324 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1328 memset(hpd, 0, mec_hpd_size);
1330 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1331 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1336 static const u32 vgpr_init_compute_shader[] =
1338 0x7e000209, 0x7e020208,
1339 0x7e040207, 0x7e060206,
1340 0x7e080205, 0x7e0a0204,
1341 0x7e0c0203, 0x7e0e0202,
1342 0x7e100201, 0x7e120200,
1343 0x7e140209, 0x7e160208,
1344 0x7e180207, 0x7e1a0206,
1345 0x7e1c0205, 0x7e1e0204,
1346 0x7e200203, 0x7e220202,
1347 0x7e240201, 0x7e260200,
1348 0x7e280209, 0x7e2a0208,
1349 0x7e2c0207, 0x7e2e0206,
1350 0x7e300205, 0x7e320204,
1351 0x7e340203, 0x7e360202,
1352 0x7e380201, 0x7e3a0200,
1353 0x7e3c0209, 0x7e3e0208,
1354 0x7e400207, 0x7e420206,
1355 0x7e440205, 0x7e460204,
1356 0x7e480203, 0x7e4a0202,
1357 0x7e4c0201, 0x7e4e0200,
1358 0x7e500209, 0x7e520208,
1359 0x7e540207, 0x7e560206,
1360 0x7e580205, 0x7e5a0204,
1361 0x7e5c0203, 0x7e5e0202,
1362 0x7e600201, 0x7e620200,
1363 0x7e640209, 0x7e660208,
1364 0x7e680207, 0x7e6a0206,
1365 0x7e6c0205, 0x7e6e0204,
1366 0x7e700203, 0x7e720202,
1367 0x7e740201, 0x7e760200,
1368 0x7e780209, 0x7e7a0208,
1369 0x7e7c0207, 0x7e7e0206,
1370 0xbf8a0000, 0xbf810000,
1373 static const u32 sgpr_init_compute_shader[] =
1375 0xbe8a0100, 0xbe8c0102,
1376 0xbe8e0104, 0xbe900106,
1377 0xbe920108, 0xbe940100,
1378 0xbe960102, 0xbe980104,
1379 0xbe9a0106, 0xbe9c0108,
1380 0xbe9e0100, 0xbea00102,
1381 0xbea20104, 0xbea40106,
1382 0xbea60108, 0xbea80100,
1383 0xbeaa0102, 0xbeac0104,
1384 0xbeae0106, 0xbeb00108,
1385 0xbeb20100, 0xbeb40102,
1386 0xbeb60104, 0xbeb80106,
1387 0xbeba0108, 0xbebc0100,
1388 0xbebe0102, 0xbec00104,
1389 0xbec20106, 0xbec40108,
1390 0xbec60100, 0xbec80102,
1391 0xbee60004, 0xbee70005,
1392 0xbeea0006, 0xbeeb0007,
1393 0xbee80008, 0xbee90009,
1394 0xbefc0000, 0xbf8a0000,
1395 0xbf810000, 0x00000000,
1398 static const u32 vgpr_init_regs[] =
1400 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1401 mmCOMPUTE_RESOURCE_LIMITS, 0,
1402 mmCOMPUTE_NUM_THREAD_X, 256*4,
1403 mmCOMPUTE_NUM_THREAD_Y, 1,
1404 mmCOMPUTE_NUM_THREAD_Z, 1,
1405 mmCOMPUTE_PGM_RSRC2, 20,
1406 mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1407 mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1408 mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1409 mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1410 mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1411 mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1412 mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1413 mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1414 mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1415 mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1418 static const u32 sgpr1_init_regs[] =
1420 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1421 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1422 mmCOMPUTE_NUM_THREAD_X, 256*5,
1423 mmCOMPUTE_NUM_THREAD_Y, 1,
1424 mmCOMPUTE_NUM_THREAD_Z, 1,
1425 mmCOMPUTE_PGM_RSRC2, 20,
1426 mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1427 mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1428 mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1429 mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1430 mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1431 mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1432 mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1433 mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1434 mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1435 mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1438 static const u32 sgpr2_init_regs[] =
1440 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1441 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1442 mmCOMPUTE_NUM_THREAD_X, 256*5,
1443 mmCOMPUTE_NUM_THREAD_Y, 1,
1444 mmCOMPUTE_NUM_THREAD_Z, 1,
1445 mmCOMPUTE_PGM_RSRC2, 20,
1446 mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1447 mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1448 mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1449 mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1450 mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1451 mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1452 mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1453 mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1454 mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1455 mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1458 static const u32 sec_ded_counter_registers[] =
1461 mmCPC_EDC_SCRATCH_CNT,
1462 mmCPC_EDC_UCODE_CNT,
1469 mmDC_EDC_CSINVOC_CNT,
1470 mmDC_EDC_RESTORE_CNT,
1476 mmSQC_ATC_EDC_GATCL1_CNT,
1482 mmTCP_ATC_EDC_GATCL1_CNT,
1487 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1489 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1490 struct amdgpu_ib ib;
1491 struct dma_fence *f = NULL;
1494 unsigned total_size, vgpr_offset, sgpr_offset;
1497 /* only supported on CZ */
1498 if (adev->asic_type != CHIP_CARRIZO)
1501 /* bail if the compute ring is not ready */
1505 tmp = RREG32(mmGB_EDC_MODE);
1506 WREG32(mmGB_EDC_MODE, 0);
1509 (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1511 (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1513 (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1514 total_size = ALIGN(total_size, 256);
1515 vgpr_offset = total_size;
1516 total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1517 sgpr_offset = total_size;
1518 total_size += sizeof(sgpr_init_compute_shader);
1520 /* allocate an indirect buffer to put the commands in */
1521 memset(&ib, 0, sizeof(ib));
1522 r = amdgpu_ib_get(adev, NULL, total_size, &ib);
1524 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1528 /* load the compute shaders */
1529 for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1530 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1532 for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1533 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1535 /* init the ib length to 0 */
1539 /* write the register state for the compute dispatch */
1540 for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1541 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1542 ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1543 ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1545 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1546 gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1547 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1548 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1549 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1550 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1552 /* write dispatch packet */
1553 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1554 ib.ptr[ib.length_dw++] = 8; /* x */
1555 ib.ptr[ib.length_dw++] = 1; /* y */
1556 ib.ptr[ib.length_dw++] = 1; /* z */
1557 ib.ptr[ib.length_dw++] =
1558 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1560 /* write CS partial flush packet */
1561 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1562 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1565 /* write the register state for the compute dispatch */
1566 for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1567 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1568 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1569 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1571 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1572 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1573 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1574 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1575 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1576 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1578 /* write dispatch packet */
1579 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1580 ib.ptr[ib.length_dw++] = 8; /* x */
1581 ib.ptr[ib.length_dw++] = 1; /* y */
1582 ib.ptr[ib.length_dw++] = 1; /* z */
1583 ib.ptr[ib.length_dw++] =
1584 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1586 /* write CS partial flush packet */
1587 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1588 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1591 /* write the register state for the compute dispatch */
1592 for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1593 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1594 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1595 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1597 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1598 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1599 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1600 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1601 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1602 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1604 /* write dispatch packet */
1605 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1606 ib.ptr[ib.length_dw++] = 8; /* x */
1607 ib.ptr[ib.length_dw++] = 1; /* y */
1608 ib.ptr[ib.length_dw++] = 1; /* z */
1609 ib.ptr[ib.length_dw++] =
1610 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1612 /* write CS partial flush packet */
1613 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1614 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1616 /* shedule the ib on the ring */
1617 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1619 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1623 /* wait for the GPU to finish processing the IB */
1624 r = dma_fence_wait(f, false);
1626 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1630 tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1631 tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1632 WREG32(mmGB_EDC_MODE, tmp);
1634 tmp = RREG32(mmCC_GC_EDC_CONFIG);
1635 tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1636 WREG32(mmCC_GC_EDC_CONFIG, tmp);
1639 /* read back registers to clear the counters */
1640 for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1641 RREG32(sec_ded_counter_registers[i]);
1644 amdgpu_ib_free(adev, &ib, NULL);
1650 static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1653 u32 mc_shared_chmap, mc_arb_ramcfg;
1654 u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1658 switch (adev->asic_type) {
1660 adev->gfx.config.max_shader_engines = 1;
1661 adev->gfx.config.max_tile_pipes = 2;
1662 adev->gfx.config.max_cu_per_sh = 6;
1663 adev->gfx.config.max_sh_per_se = 1;
1664 adev->gfx.config.max_backends_per_se = 2;
1665 adev->gfx.config.max_texture_channel_caches = 2;
1666 adev->gfx.config.max_gprs = 256;
1667 adev->gfx.config.max_gs_threads = 32;
1668 adev->gfx.config.max_hw_contexts = 8;
1670 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1671 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1672 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1673 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1674 gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1677 adev->gfx.config.max_shader_engines = 4;
1678 adev->gfx.config.max_tile_pipes = 16;
1679 adev->gfx.config.max_cu_per_sh = 16;
1680 adev->gfx.config.max_sh_per_se = 1;
1681 adev->gfx.config.max_backends_per_se = 4;
1682 adev->gfx.config.max_texture_channel_caches = 16;
1683 adev->gfx.config.max_gprs = 256;
1684 adev->gfx.config.max_gs_threads = 32;
1685 adev->gfx.config.max_hw_contexts = 8;
1687 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1688 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1689 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1690 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1691 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1693 case CHIP_POLARIS11:
1694 case CHIP_POLARIS12:
1695 ret = amdgpu_atombios_get_gfx_info(adev);
1698 adev->gfx.config.max_gprs = 256;
1699 adev->gfx.config.max_gs_threads = 32;
1700 adev->gfx.config.max_hw_contexts = 8;
1702 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1703 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1704 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1705 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1706 gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
1708 case CHIP_POLARIS10:
1709 ret = amdgpu_atombios_get_gfx_info(adev);
1712 adev->gfx.config.max_gprs = 256;
1713 adev->gfx.config.max_gs_threads = 32;
1714 adev->gfx.config.max_hw_contexts = 8;
1716 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1717 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1718 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1719 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1720 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1723 adev->gfx.config.max_shader_engines = 4;
1724 adev->gfx.config.max_tile_pipes = 8;
1725 adev->gfx.config.max_cu_per_sh = 8;
1726 adev->gfx.config.max_sh_per_se = 1;
1727 adev->gfx.config.max_backends_per_se = 2;
1728 adev->gfx.config.max_texture_channel_caches = 8;
1729 adev->gfx.config.max_gprs = 256;
1730 adev->gfx.config.max_gs_threads = 32;
1731 adev->gfx.config.max_hw_contexts = 8;
1733 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1734 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1735 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1736 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1737 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1740 adev->gfx.config.max_shader_engines = 1;
1741 adev->gfx.config.max_tile_pipes = 2;
1742 adev->gfx.config.max_sh_per_se = 1;
1743 adev->gfx.config.max_backends_per_se = 2;
1744 adev->gfx.config.max_cu_per_sh = 8;
1745 adev->gfx.config.max_texture_channel_caches = 2;
1746 adev->gfx.config.max_gprs = 256;
1747 adev->gfx.config.max_gs_threads = 32;
1748 adev->gfx.config.max_hw_contexts = 8;
1750 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1751 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1752 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1753 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1754 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1757 adev->gfx.config.max_shader_engines = 1;
1758 adev->gfx.config.max_tile_pipes = 2;
1759 adev->gfx.config.max_sh_per_se = 1;
1760 adev->gfx.config.max_backends_per_se = 1;
1761 adev->gfx.config.max_cu_per_sh = 3;
1762 adev->gfx.config.max_texture_channel_caches = 2;
1763 adev->gfx.config.max_gprs = 256;
1764 adev->gfx.config.max_gs_threads = 16;
1765 adev->gfx.config.max_hw_contexts = 8;
1767 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1768 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1769 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1770 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1771 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1774 adev->gfx.config.max_shader_engines = 2;
1775 adev->gfx.config.max_tile_pipes = 4;
1776 adev->gfx.config.max_cu_per_sh = 2;
1777 adev->gfx.config.max_sh_per_se = 1;
1778 adev->gfx.config.max_backends_per_se = 2;
1779 adev->gfx.config.max_texture_channel_caches = 4;
1780 adev->gfx.config.max_gprs = 256;
1781 adev->gfx.config.max_gs_threads = 32;
1782 adev->gfx.config.max_hw_contexts = 8;
1784 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1785 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1786 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1787 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1788 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1792 mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
1793 adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1794 mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1796 adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1797 adev->gfx.config.mem_max_burst_length_bytes = 256;
1798 if (adev->flags & AMD_IS_APU) {
1799 /* Get memory bank mapping mode. */
1800 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
1801 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1802 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1804 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
1805 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1806 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1808 /* Validate settings in case only one DIMM installed. */
1809 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
1810 dimm00_addr_map = 0;
1811 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
1812 dimm01_addr_map = 0;
1813 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
1814 dimm10_addr_map = 0;
1815 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
1816 dimm11_addr_map = 0;
1818 /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1819 /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1820 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
1821 adev->gfx.config.mem_row_size_in_kb = 2;
1823 adev->gfx.config.mem_row_size_in_kb = 1;
1825 tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
1826 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1827 if (adev->gfx.config.mem_row_size_in_kb > 4)
1828 adev->gfx.config.mem_row_size_in_kb = 4;
1831 adev->gfx.config.shader_engine_tile_size = 32;
1832 adev->gfx.config.num_gpus = 1;
1833 adev->gfx.config.multi_gpu_tile_size = 64;
1835 /* fix up row size */
1836 switch (adev->gfx.config.mem_row_size_in_kb) {
1839 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
1842 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
1845 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
1848 adev->gfx.config.gb_addr_config = gb_addr_config;
1853 static int gfx_v8_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
1854 int mec, int pipe, int queue)
1858 struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
1860 ring = &adev->gfx.compute_ring[ring_id];
1865 ring->queue = queue;
1867 ring->ring_obj = NULL;
1868 ring->use_doorbell = true;
1869 ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + ring_id;
1870 ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
1871 + (ring_id * GFX8_MEC_HPD_SIZE);
1872 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
1874 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
1875 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
1878 /* type-2 packets are deprecated on MEC, use type-3 instead */
1879 r = amdgpu_ring_init(adev, ring, 1024,
1880 &adev->gfx.eop_irq, irq_type);
1888 static int gfx_v8_0_sw_init(void *handle)
1890 int i, j, k, r, ring_id;
1891 struct amdgpu_ring *ring;
1892 struct amdgpu_kiq *kiq;
1893 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1895 switch (adev->asic_type) {
1898 case CHIP_POLARIS11:
1899 case CHIP_POLARIS12:
1900 case CHIP_POLARIS10:
1902 adev->gfx.mec.num_mec = 2;
1907 adev->gfx.mec.num_mec = 1;
1911 adev->gfx.mec.num_pipe_per_mec = 4;
1912 adev->gfx.mec.num_queue_per_pipe = 8;
1915 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 178, &adev->gfx.kiq.irq);
1920 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 181, &adev->gfx.eop_irq);
1924 /* Privileged reg */
1925 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 184,
1926 &adev->gfx.priv_reg_irq);
1930 /* Privileged inst */
1931 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 185,
1932 &adev->gfx.priv_inst_irq);
1936 adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
1938 gfx_v8_0_scratch_init(adev);
1940 r = gfx_v8_0_init_microcode(adev);
1942 DRM_ERROR("Failed to load gfx firmware!\n");
1946 r = gfx_v8_0_rlc_init(adev);
1948 DRM_ERROR("Failed to init rlc BOs!\n");
1952 r = gfx_v8_0_mec_init(adev);
1954 DRM_ERROR("Failed to init MEC BOs!\n");
1958 /* set up the gfx ring */
1959 for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
1960 ring = &adev->gfx.gfx_ring[i];
1961 ring->ring_obj = NULL;
1962 sprintf(ring->name, "gfx");
1963 /* no gfx doorbells on iceland */
1964 if (adev->asic_type != CHIP_TOPAZ) {
1965 ring->use_doorbell = true;
1966 ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0;
1969 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
1970 AMDGPU_CP_IRQ_GFX_EOP);
1976 /* set up the compute queues - allocate horizontally across pipes */
1978 for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
1979 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
1980 for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
1981 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
1984 r = gfx_v8_0_compute_ring_init(adev,
1995 r = amdgpu_gfx_kiq_init(adev, GFX8_MEC_HPD_SIZE);
1997 DRM_ERROR("Failed to init KIQ BOs!\n");
2001 kiq = &adev->gfx.kiq;
2002 r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
2006 /* create MQD for all compute queues as well as KIQ for SRIOV case */
2007 r = amdgpu_gfx_compute_mqd_sw_init(adev, sizeof(struct vi_mqd_allocation));
2011 /* reserve GDS, GWS and OA resource for gfx */
2012 r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size,
2013 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS,
2014 &adev->gds.gds_gfx_bo, NULL, NULL);
2018 r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size,
2019 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS,
2020 &adev->gds.gws_gfx_bo, NULL, NULL);
2024 r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size,
2025 PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA,
2026 &adev->gds.oa_gfx_bo, NULL, NULL);
2030 adev->gfx.ce_ram_size = 0x8000;
2032 r = gfx_v8_0_gpu_early_init(adev);
2039 static int gfx_v8_0_sw_fini(void *handle)
2042 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2044 amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
2045 amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL);
2046 amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
2048 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2049 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2050 for (i = 0; i < adev->gfx.num_compute_rings; i++)
2051 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2053 amdgpu_gfx_compute_mqd_sw_fini(adev);
2054 amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
2055 amdgpu_gfx_kiq_fini(adev);
2057 gfx_v8_0_mec_fini(adev);
2058 gfx_v8_0_rlc_fini(adev);
2059 gfx_v8_0_free_microcode(adev);
2064 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
2066 uint32_t *modearray, *mod2array;
2067 const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
2068 const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
2071 modearray = adev->gfx.config.tile_mode_array;
2072 mod2array = adev->gfx.config.macrotile_mode_array;
2074 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2075 modearray[reg_offset] = 0;
2077 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2078 mod2array[reg_offset] = 0;
2080 switch (adev->asic_type) {
2082 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2083 PIPE_CONFIG(ADDR_SURF_P2) |
2084 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2085 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2086 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2087 PIPE_CONFIG(ADDR_SURF_P2) |
2088 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2089 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2090 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2091 PIPE_CONFIG(ADDR_SURF_P2) |
2092 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2093 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2094 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2095 PIPE_CONFIG(ADDR_SURF_P2) |
2096 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2097 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2098 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2099 PIPE_CONFIG(ADDR_SURF_P2) |
2100 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2101 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2102 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2103 PIPE_CONFIG(ADDR_SURF_P2) |
2104 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2105 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2106 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2107 PIPE_CONFIG(ADDR_SURF_P2) |
2108 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2109 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2110 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2111 PIPE_CONFIG(ADDR_SURF_P2));
2112 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2113 PIPE_CONFIG(ADDR_SURF_P2) |
2114 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2115 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2116 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2117 PIPE_CONFIG(ADDR_SURF_P2) |
2118 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2119 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2120 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2121 PIPE_CONFIG(ADDR_SURF_P2) |
2122 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2123 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2124 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2125 PIPE_CONFIG(ADDR_SURF_P2) |
2126 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2127 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2128 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2129 PIPE_CONFIG(ADDR_SURF_P2) |
2130 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2131 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2132 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2133 PIPE_CONFIG(ADDR_SURF_P2) |
2134 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2135 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2136 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2137 PIPE_CONFIG(ADDR_SURF_P2) |
2138 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2139 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2140 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2141 PIPE_CONFIG(ADDR_SURF_P2) |
2142 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2143 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2144 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2145 PIPE_CONFIG(ADDR_SURF_P2) |
2146 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2147 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2148 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2149 PIPE_CONFIG(ADDR_SURF_P2) |
2150 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2151 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2152 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2153 PIPE_CONFIG(ADDR_SURF_P2) |
2154 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2155 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2156 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2157 PIPE_CONFIG(ADDR_SURF_P2) |
2158 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2159 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2160 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2161 PIPE_CONFIG(ADDR_SURF_P2) |
2162 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2163 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2164 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2165 PIPE_CONFIG(ADDR_SURF_P2) |
2166 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2167 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2168 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2169 PIPE_CONFIG(ADDR_SURF_P2) |
2170 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2171 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2172 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2173 PIPE_CONFIG(ADDR_SURF_P2) |
2174 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2175 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2176 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2177 PIPE_CONFIG(ADDR_SURF_P2) |
2178 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2179 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2180 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2181 PIPE_CONFIG(ADDR_SURF_P2) |
2182 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2183 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2185 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2186 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2187 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2188 NUM_BANKS(ADDR_SURF_8_BANK));
2189 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2190 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2191 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2192 NUM_BANKS(ADDR_SURF_8_BANK));
2193 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2194 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2195 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2196 NUM_BANKS(ADDR_SURF_8_BANK));
2197 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2198 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2199 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2200 NUM_BANKS(ADDR_SURF_8_BANK));
2201 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2202 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2203 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2204 NUM_BANKS(ADDR_SURF_8_BANK));
2205 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2206 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2207 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2208 NUM_BANKS(ADDR_SURF_8_BANK));
2209 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2210 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2211 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2212 NUM_BANKS(ADDR_SURF_8_BANK));
2213 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2214 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2215 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2216 NUM_BANKS(ADDR_SURF_16_BANK));
2217 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2218 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2219 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2220 NUM_BANKS(ADDR_SURF_16_BANK));
2221 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2222 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2223 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2224 NUM_BANKS(ADDR_SURF_16_BANK));
2225 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2226 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2227 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2228 NUM_BANKS(ADDR_SURF_16_BANK));
2229 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2230 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2231 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2232 NUM_BANKS(ADDR_SURF_16_BANK));
2233 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2234 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2235 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2236 NUM_BANKS(ADDR_SURF_16_BANK));
2237 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2238 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2239 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2240 NUM_BANKS(ADDR_SURF_8_BANK));
2242 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2243 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2245 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2247 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2248 if (reg_offset != 7)
2249 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2253 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2254 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2255 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2256 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2257 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2258 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2259 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2260 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2261 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2262 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2263 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2264 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2265 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2266 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2267 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2268 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2269 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2270 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2271 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2272 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2273 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2274 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2275 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2276 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2277 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2278 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2279 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2280 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2281 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2282 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2283 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2284 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2285 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2286 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2287 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2288 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2289 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2290 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2291 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2292 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2293 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2294 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2295 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2296 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2297 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2298 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2299 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2300 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2301 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2302 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2303 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2304 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2305 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2306 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2307 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2308 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2309 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2310 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2311 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2312 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2313 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2314 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2315 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2316 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2317 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2318 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2319 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2320 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2321 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2322 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2323 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2324 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2325 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2326 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2327 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2328 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2329 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2330 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2331 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2332 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2333 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2334 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2335 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2336 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2337 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2338 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2339 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2340 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2341 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2342 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2343 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2344 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2345 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2346 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2347 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2348 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2349 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2350 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2351 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2352 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2353 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2354 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2355 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2356 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2357 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2358 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2359 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2360 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2361 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2362 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2363 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2364 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2365 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2366 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2367 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2368 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2369 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2370 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2371 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2372 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2373 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2374 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2376 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2377 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2378 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2379 NUM_BANKS(ADDR_SURF_8_BANK));
2380 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2381 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2382 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2383 NUM_BANKS(ADDR_SURF_8_BANK));
2384 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2385 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2386 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2387 NUM_BANKS(ADDR_SURF_8_BANK));
2388 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2389 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2390 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2391 NUM_BANKS(ADDR_SURF_8_BANK));
2392 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2393 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2394 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2395 NUM_BANKS(ADDR_SURF_8_BANK));
2396 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2397 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2398 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2399 NUM_BANKS(ADDR_SURF_8_BANK));
2400 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2401 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2402 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2403 NUM_BANKS(ADDR_SURF_8_BANK));
2404 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2405 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2406 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2407 NUM_BANKS(ADDR_SURF_8_BANK));
2408 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2409 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2410 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2411 NUM_BANKS(ADDR_SURF_8_BANK));
2412 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2413 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2414 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2415 NUM_BANKS(ADDR_SURF_8_BANK));
2416 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2417 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2418 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2419 NUM_BANKS(ADDR_SURF_8_BANK));
2420 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2421 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2422 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2423 NUM_BANKS(ADDR_SURF_8_BANK));
2424 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2425 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2426 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2427 NUM_BANKS(ADDR_SURF_8_BANK));
2428 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2429 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2430 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2431 NUM_BANKS(ADDR_SURF_4_BANK));
2433 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2434 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2436 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2437 if (reg_offset != 7)
2438 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2442 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2443 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2444 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2445 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2446 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2447 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2448 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2449 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2450 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2451 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2452 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2453 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2454 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2455 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2456 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2457 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2458 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2459 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2460 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2461 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2462 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2463 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2464 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2465 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2466 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2467 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2468 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2469 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2470 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2471 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2472 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2473 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2474 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2475 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2476 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2477 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2478 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2479 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2480 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2481 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2482 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2483 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2484 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2485 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2486 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2487 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2488 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2489 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2490 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2491 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2492 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2493 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2494 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2495 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2496 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2497 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2498 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2499 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2500 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2501 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2502 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2503 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2504 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2505 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2506 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2507 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2508 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2509 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2510 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2511 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2512 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2513 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2514 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2515 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2516 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2517 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2518 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2519 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2520 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2521 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2522 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2523 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2524 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2525 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2526 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2527 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2528 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2529 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2530 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2531 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2532 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2533 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2534 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2535 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2536 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2537 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2538 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2539 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2540 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2541 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2542 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2543 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2544 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2545 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2546 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2547 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2548 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2549 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2550 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2551 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2552 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2553 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2554 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2555 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2556 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2557 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2558 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2559 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2560 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2561 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2562 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2563 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2565 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2566 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2567 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2568 NUM_BANKS(ADDR_SURF_16_BANK));
2569 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2570 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2571 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2572 NUM_BANKS(ADDR_SURF_16_BANK));
2573 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2574 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2575 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2576 NUM_BANKS(ADDR_SURF_16_BANK));
2577 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2578 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2579 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2580 NUM_BANKS(ADDR_SURF_16_BANK));
2581 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2582 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2583 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2584 NUM_BANKS(ADDR_SURF_16_BANK));
2585 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2586 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2587 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2588 NUM_BANKS(ADDR_SURF_16_BANK));
2589 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2590 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2591 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2592 NUM_BANKS(ADDR_SURF_16_BANK));
2593 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2594 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2595 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2596 NUM_BANKS(ADDR_SURF_16_BANK));
2597 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2598 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2599 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2600 NUM_BANKS(ADDR_SURF_16_BANK));
2601 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2602 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2603 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2604 NUM_BANKS(ADDR_SURF_16_BANK));
2605 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2606 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2607 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2608 NUM_BANKS(ADDR_SURF_16_BANK));
2609 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2610 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2611 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2612 NUM_BANKS(ADDR_SURF_8_BANK));
2613 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2614 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2615 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2616 NUM_BANKS(ADDR_SURF_4_BANK));
2617 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2618 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2619 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2620 NUM_BANKS(ADDR_SURF_4_BANK));
2622 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2623 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2625 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2626 if (reg_offset != 7)
2627 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2630 case CHIP_POLARIS11:
2631 case CHIP_POLARIS12:
2632 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2633 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2634 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2635 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2636 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2637 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2638 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2639 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2640 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2641 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2642 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2643 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2644 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2645 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2646 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2647 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2648 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2649 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2650 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2651 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2652 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2653 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2654 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2655 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2656 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2657 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2658 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2659 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2660 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2661 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2662 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2663 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2664 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2665 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2666 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2667 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2668 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2669 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2670 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2671 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2672 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2673 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2674 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2675 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2676 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2677 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2678 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2679 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2680 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2681 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2682 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2683 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2684 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2685 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2686 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2687 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2688 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2689 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2690 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2691 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2692 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2693 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2694 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2695 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2696 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2697 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2698 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2699 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2700 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2701 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2702 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2703 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2704 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2705 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2706 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2707 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2708 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2709 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2710 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2711 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2712 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2713 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2714 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2715 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2716 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2717 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2718 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2719 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2720 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2721 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2722 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2723 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2724 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2725 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2726 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2727 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2728 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2729 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2730 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2731 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2732 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2733 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2734 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2735 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2736 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2737 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2738 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2739 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2740 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2741 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2742 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2743 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2744 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2745 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2746 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2747 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2748 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2749 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2750 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2751 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2752 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2753 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2755 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2756 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2757 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2758 NUM_BANKS(ADDR_SURF_16_BANK));
2760 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2761 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2762 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2763 NUM_BANKS(ADDR_SURF_16_BANK));
2765 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2766 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2767 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2768 NUM_BANKS(ADDR_SURF_16_BANK));
2770 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2771 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2772 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2773 NUM_BANKS(ADDR_SURF_16_BANK));
2775 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2776 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2777 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2778 NUM_BANKS(ADDR_SURF_16_BANK));
2780 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2781 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2782 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2783 NUM_BANKS(ADDR_SURF_16_BANK));
2785 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2786 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2787 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2788 NUM_BANKS(ADDR_SURF_16_BANK));
2790 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2791 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2792 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2793 NUM_BANKS(ADDR_SURF_16_BANK));
2795 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2796 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2797 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2798 NUM_BANKS(ADDR_SURF_16_BANK));
2800 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2801 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2802 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2803 NUM_BANKS(ADDR_SURF_16_BANK));
2805 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2806 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2807 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2808 NUM_BANKS(ADDR_SURF_16_BANK));
2810 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2811 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2812 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2813 NUM_BANKS(ADDR_SURF_16_BANK));
2815 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2816 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2817 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2818 NUM_BANKS(ADDR_SURF_8_BANK));
2820 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2821 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2822 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2823 NUM_BANKS(ADDR_SURF_4_BANK));
2825 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2826 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2828 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2829 if (reg_offset != 7)
2830 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2833 case CHIP_POLARIS10:
2834 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2835 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2836 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2837 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2838 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2839 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2840 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2841 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2842 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2843 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2844 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2845 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2846 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2847 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2848 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2849 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2850 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2851 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2852 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2853 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2854 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2855 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2856 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2857 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2858 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2859 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2860 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2861 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2862 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2863 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2864 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2865 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2866 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2867 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2868 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2869 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2870 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2871 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2872 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2873 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2874 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2875 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2876 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2877 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2878 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2879 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2880 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2881 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2882 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2883 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2884 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2885 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2886 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2887 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2888 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2889 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2890 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2891 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2892 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2893 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2894 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2895 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2896 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2897 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2898 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2899 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2900 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2901 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2902 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2903 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2904 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2905 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2906 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2907 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2908 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2909 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2910 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2911 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2912 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2913 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2914 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2915 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2916 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2917 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2918 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2919 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2920 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2921 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2922 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2923 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2924 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2925 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2926 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2927 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2928 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2929 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2930 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2931 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2932 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2933 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2934 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2935 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2936 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2937 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2938 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2939 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2940 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2941 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2942 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2943 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2944 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2945 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2946 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2947 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2948 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2949 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2950 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2951 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2952 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2953 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2954 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2955 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2957 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2958 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2959 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2960 NUM_BANKS(ADDR_SURF_16_BANK));
2962 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2963 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2964 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2965 NUM_BANKS(ADDR_SURF_16_BANK));
2967 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2968 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2969 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2970 NUM_BANKS(ADDR_SURF_16_BANK));
2972 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2973 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2974 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2975 NUM_BANKS(ADDR_SURF_16_BANK));
2977 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2978 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2979 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2980 NUM_BANKS(ADDR_SURF_16_BANK));
2982 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2983 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2984 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2985 NUM_BANKS(ADDR_SURF_16_BANK));
2987 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2988 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2989 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2990 NUM_BANKS(ADDR_SURF_16_BANK));
2992 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2993 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2994 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2995 NUM_BANKS(ADDR_SURF_16_BANK));
2997 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2998 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2999 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3000 NUM_BANKS(ADDR_SURF_16_BANK));
3002 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3003 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3004 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3005 NUM_BANKS(ADDR_SURF_16_BANK));
3007 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3008 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3009 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3010 NUM_BANKS(ADDR_SURF_16_BANK));
3012 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3013 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3014 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3015 NUM_BANKS(ADDR_SURF_8_BANK));
3017 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3018 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3019 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3020 NUM_BANKS(ADDR_SURF_4_BANK));
3022 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3023 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3024 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3025 NUM_BANKS(ADDR_SURF_4_BANK));
3027 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3028 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3030 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3031 if (reg_offset != 7)
3032 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3036 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3037 PIPE_CONFIG(ADDR_SURF_P2) |
3038 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3039 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3040 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3041 PIPE_CONFIG(ADDR_SURF_P2) |
3042 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3043 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3044 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3045 PIPE_CONFIG(ADDR_SURF_P2) |
3046 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3047 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3048 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3049 PIPE_CONFIG(ADDR_SURF_P2) |
3050 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3051 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3052 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3053 PIPE_CONFIG(ADDR_SURF_P2) |
3054 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3055 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3056 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3057 PIPE_CONFIG(ADDR_SURF_P2) |
3058 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3059 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3060 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3061 PIPE_CONFIG(ADDR_SURF_P2) |
3062 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3063 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3064 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3065 PIPE_CONFIG(ADDR_SURF_P2));
3066 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3067 PIPE_CONFIG(ADDR_SURF_P2) |
3068 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3069 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3070 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3071 PIPE_CONFIG(ADDR_SURF_P2) |
3072 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3073 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3074 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3075 PIPE_CONFIG(ADDR_SURF_P2) |
3076 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3077 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3078 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3079 PIPE_CONFIG(ADDR_SURF_P2) |
3080 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3081 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3082 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3083 PIPE_CONFIG(ADDR_SURF_P2) |
3084 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3085 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3086 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3087 PIPE_CONFIG(ADDR_SURF_P2) |
3088 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3089 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3090 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3091 PIPE_CONFIG(ADDR_SURF_P2) |
3092 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3093 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3094 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3095 PIPE_CONFIG(ADDR_SURF_P2) |
3096 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3097 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3098 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3099 PIPE_CONFIG(ADDR_SURF_P2) |
3100 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3101 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3102 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3103 PIPE_CONFIG(ADDR_SURF_P2) |
3104 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3105 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3106 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3107 PIPE_CONFIG(ADDR_SURF_P2) |
3108 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3109 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3110 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3111 PIPE_CONFIG(ADDR_SURF_P2) |
3112 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3113 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3114 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3115 PIPE_CONFIG(ADDR_SURF_P2) |
3116 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3117 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3118 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3119 PIPE_CONFIG(ADDR_SURF_P2) |
3120 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3121 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3122 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3123 PIPE_CONFIG(ADDR_SURF_P2) |
3124 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3125 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3126 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3127 PIPE_CONFIG(ADDR_SURF_P2) |
3128 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3129 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3130 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3131 PIPE_CONFIG(ADDR_SURF_P2) |
3132 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3133 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3134 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3135 PIPE_CONFIG(ADDR_SURF_P2) |
3136 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3137 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3139 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3140 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3141 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3142 NUM_BANKS(ADDR_SURF_8_BANK));
3143 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3144 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3145 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3146 NUM_BANKS(ADDR_SURF_8_BANK));
3147 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3148 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3149 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3150 NUM_BANKS(ADDR_SURF_8_BANK));
3151 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3152 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3153 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3154 NUM_BANKS(ADDR_SURF_8_BANK));
3155 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3156 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3157 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3158 NUM_BANKS(ADDR_SURF_8_BANK));
3159 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3160 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3161 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3162 NUM_BANKS(ADDR_SURF_8_BANK));
3163 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3164 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3165 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3166 NUM_BANKS(ADDR_SURF_8_BANK));
3167 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3168 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3169 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3170 NUM_BANKS(ADDR_SURF_16_BANK));
3171 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3172 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3173 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3174 NUM_BANKS(ADDR_SURF_16_BANK));
3175 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3176 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3177 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3178 NUM_BANKS(ADDR_SURF_16_BANK));
3179 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3180 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3181 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3182 NUM_BANKS(ADDR_SURF_16_BANK));
3183 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3184 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3185 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3186 NUM_BANKS(ADDR_SURF_16_BANK));
3187 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3188 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3189 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3190 NUM_BANKS(ADDR_SURF_16_BANK));
3191 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3192 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3193 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3194 NUM_BANKS(ADDR_SURF_8_BANK));
3196 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3197 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3199 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3201 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3202 if (reg_offset != 7)
3203 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3208 "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
3212 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3213 PIPE_CONFIG(ADDR_SURF_P2) |
3214 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3215 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3216 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3217 PIPE_CONFIG(ADDR_SURF_P2) |
3218 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3219 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3220 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3221 PIPE_CONFIG(ADDR_SURF_P2) |
3222 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3223 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3224 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3225 PIPE_CONFIG(ADDR_SURF_P2) |
3226 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3227 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3228 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3229 PIPE_CONFIG(ADDR_SURF_P2) |
3230 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3231 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3232 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3233 PIPE_CONFIG(ADDR_SURF_P2) |
3234 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3235 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3236 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3237 PIPE_CONFIG(ADDR_SURF_P2) |
3238 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3239 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3240 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3241 PIPE_CONFIG(ADDR_SURF_P2));
3242 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3243 PIPE_CONFIG(ADDR_SURF_P2) |
3244 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3245 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3246 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3247 PIPE_CONFIG(ADDR_SURF_P2) |
3248 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3249 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3250 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3251 PIPE_CONFIG(ADDR_SURF_P2) |
3252 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3253 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3254 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3255 PIPE_CONFIG(ADDR_SURF_P2) |
3256 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3257 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3258 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3259 PIPE_CONFIG(ADDR_SURF_P2) |
3260 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3261 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3262 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3263 PIPE_CONFIG(ADDR_SURF_P2) |
3264 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3265 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3266 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3267 PIPE_CONFIG(ADDR_SURF_P2) |
3268 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3269 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3270 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3271 PIPE_CONFIG(ADDR_SURF_P2) |
3272 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3273 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3274 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3275 PIPE_CONFIG(ADDR_SURF_P2) |
3276 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3277 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3278 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3279 PIPE_CONFIG(ADDR_SURF_P2) |
3280 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3281 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3282 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3283 PIPE_CONFIG(ADDR_SURF_P2) |
3284 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3285 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3286 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3287 PIPE_CONFIG(ADDR_SURF_P2) |
3288 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3289 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3290 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3291 PIPE_CONFIG(ADDR_SURF_P2) |
3292 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3293 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3294 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3295 PIPE_CONFIG(ADDR_SURF_P2) |
3296 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3297 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3298 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3299 PIPE_CONFIG(ADDR_SURF_P2) |
3300 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3301 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3302 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3303 PIPE_CONFIG(ADDR_SURF_P2) |
3304 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3305 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3306 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3307 PIPE_CONFIG(ADDR_SURF_P2) |
3308 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3309 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3310 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3311 PIPE_CONFIG(ADDR_SURF_P2) |
3312 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3313 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3315 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3316 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3317 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3318 NUM_BANKS(ADDR_SURF_8_BANK));
3319 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3320 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3321 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3322 NUM_BANKS(ADDR_SURF_8_BANK));
3323 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3324 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3325 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3326 NUM_BANKS(ADDR_SURF_8_BANK));
3327 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3328 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3329 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3330 NUM_BANKS(ADDR_SURF_8_BANK));
3331 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3332 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3333 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3334 NUM_BANKS(ADDR_SURF_8_BANK));
3335 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3336 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3337 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3338 NUM_BANKS(ADDR_SURF_8_BANK));
3339 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3340 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3341 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3342 NUM_BANKS(ADDR_SURF_8_BANK));
3343 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3344 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3345 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3346 NUM_BANKS(ADDR_SURF_16_BANK));
3347 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3348 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3349 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3350 NUM_BANKS(ADDR_SURF_16_BANK));
3351 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3352 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3353 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3354 NUM_BANKS(ADDR_SURF_16_BANK));
3355 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3356 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3357 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3358 NUM_BANKS(ADDR_SURF_16_BANK));
3359 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3360 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3361 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3362 NUM_BANKS(ADDR_SURF_16_BANK));
3363 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3364 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3365 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3366 NUM_BANKS(ADDR_SURF_16_BANK));
3367 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3368 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3369 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3370 NUM_BANKS(ADDR_SURF_8_BANK));
3372 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3373 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3375 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3377 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3378 if (reg_offset != 7)
3379 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3385 static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
3386 u32 se_num, u32 sh_num, u32 instance)
3390 if (instance == 0xffffffff)
3391 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
3393 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
3395 if (se_num == 0xffffffff)
3396 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3398 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3400 if (sh_num == 0xffffffff)
3401 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3403 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3405 WREG32(mmGRBM_GFX_INDEX, data);
3408 static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
3412 data = RREG32(mmCC_RB_BACKEND_DISABLE) |
3413 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3415 data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE);
3417 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
3418 adev->gfx.config.max_sh_per_se);
3420 return (~data) & mask;
3424 gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1)
3426 switch (adev->asic_type) {
3428 *rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) |
3429 RB_XSEL2(1) | PKR_MAP(2) |
3430 PKR_XSEL(1) | PKR_YSEL(1) |
3431 SE_MAP(2) | SE_XSEL(2) | SE_YSEL(3);
3432 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) |
3436 case CHIP_POLARIS10:
3437 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3438 SE_XSEL(1) | SE_YSEL(1);
3439 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(2) |
3444 *rconf |= RB_MAP_PKR0(2);
3447 case CHIP_POLARIS11:
3448 case CHIP_POLARIS12:
3449 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3450 SE_XSEL(1) | SE_YSEL(1);
3458 DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type);
3464 gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev,
3465 u32 raster_config, u32 raster_config_1,
3466 unsigned rb_mask, unsigned num_rb)
3468 unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1);
3469 unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1);
3470 unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2);
3471 unsigned rb_per_se = num_rb / num_se;
3472 unsigned se_mask[4];
3475 se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask;
3476 se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask;
3477 se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask;
3478 se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask;
3480 WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4));
3481 WARN_ON(!(sh_per_se == 1 || sh_per_se == 2));
3482 WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2));
3484 if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
3485 (!se_mask[2] && !se_mask[3]))) {
3486 raster_config_1 &= ~SE_PAIR_MAP_MASK;
3488 if (!se_mask[0] && !se_mask[1]) {
3490 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3);
3493 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0);
3497 for (se = 0; se < num_se; se++) {
3498 unsigned raster_config_se = raster_config;
3499 unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
3500 unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
3501 int idx = (se / 2) * 2;
3503 if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
3504 raster_config_se &= ~SE_MAP_MASK;
3506 if (!se_mask[idx]) {
3507 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3);
3509 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0);
3513 pkr0_mask &= rb_mask;
3514 pkr1_mask &= rb_mask;
3515 if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
3516 raster_config_se &= ~PKR_MAP_MASK;
3519 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3);
3521 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0);
3525 if (rb_per_se >= 2) {
3526 unsigned rb0_mask = 1 << (se * rb_per_se);
3527 unsigned rb1_mask = rb0_mask << 1;
3529 rb0_mask &= rb_mask;
3530 rb1_mask &= rb_mask;
3531 if (!rb0_mask || !rb1_mask) {
3532 raster_config_se &= ~RB_MAP_PKR0_MASK;
3536 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3);
3539 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0);
3543 if (rb_per_se > 2) {
3544 rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
3545 rb1_mask = rb0_mask << 1;
3546 rb0_mask &= rb_mask;
3547 rb1_mask &= rb_mask;
3548 if (!rb0_mask || !rb1_mask) {
3549 raster_config_se &= ~RB_MAP_PKR1_MASK;
3553 RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3);
3556 RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0);
3562 /* GRBM_GFX_INDEX has a different offset on VI */
3563 gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff);
3564 WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se);
3565 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3568 /* GRBM_GFX_INDEX has a different offset on VI */
3569 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3572 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
3576 u32 raster_config = 0, raster_config_1 = 0;
3578 u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
3579 adev->gfx.config.max_sh_per_se;
3580 unsigned num_rb_pipes;
3582 mutex_lock(&adev->grbm_idx_mutex);
3583 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3584 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3585 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3586 data = gfx_v8_0_get_rb_active_bitmap(adev);
3587 active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
3588 rb_bitmap_width_per_sh);
3591 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3593 adev->gfx.config.backend_enable_mask = active_rbs;
3594 adev->gfx.config.num_rbs = hweight32(active_rbs);
3596 num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se *
3597 adev->gfx.config.max_shader_engines, 16);
3599 gfx_v8_0_raster_config(adev, &raster_config, &raster_config_1);
3601 if (!adev->gfx.config.backend_enable_mask ||
3602 adev->gfx.config.num_rbs >= num_rb_pipes) {
3603 WREG32(mmPA_SC_RASTER_CONFIG, raster_config);
3604 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3606 gfx_v8_0_write_harvested_raster_configs(adev, raster_config, raster_config_1,
3607 adev->gfx.config.backend_enable_mask,
3611 /* cache the values for userspace */
3612 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3613 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3614 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3615 adev->gfx.config.rb_config[i][j].rb_backend_disable =
3616 RREG32(mmCC_RB_BACKEND_DISABLE);
3617 adev->gfx.config.rb_config[i][j].user_rb_backend_disable =
3618 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3619 adev->gfx.config.rb_config[i][j].raster_config =
3620 RREG32(mmPA_SC_RASTER_CONFIG);
3621 adev->gfx.config.rb_config[i][j].raster_config_1 =
3622 RREG32(mmPA_SC_RASTER_CONFIG_1);
3625 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3626 mutex_unlock(&adev->grbm_idx_mutex);
3630 * gfx_v8_0_init_compute_vmid - gart enable
3632 * @adev: amdgpu_device pointer
3634 * Initialize compute vmid sh_mem registers
3637 #define DEFAULT_SH_MEM_BASES (0x6000)
3638 #define FIRST_COMPUTE_VMID (8)
3639 #define LAST_COMPUTE_VMID (16)
3640 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
3643 uint32_t sh_mem_config;
3644 uint32_t sh_mem_bases;
3647 * Configure apertures:
3648 * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB)
3649 * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB)
3650 * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB)
3652 sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
3654 sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
3655 SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
3656 SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
3657 SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
3658 MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
3659 SH_MEM_CONFIG__PRIVATE_ATC_MASK;
3661 mutex_lock(&adev->srbm_mutex);
3662 for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
3663 vi_srbm_select(adev, 0, 0, 0, i);
3664 /* CP and shaders */
3665 WREG32(mmSH_MEM_CONFIG, sh_mem_config);
3666 WREG32(mmSH_MEM_APE1_BASE, 1);
3667 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3668 WREG32(mmSH_MEM_BASES, sh_mem_bases);
3670 vi_srbm_select(adev, 0, 0, 0, 0);
3671 mutex_unlock(&adev->srbm_mutex);
3674 static void gfx_v8_0_config_init(struct amdgpu_device *adev)
3676 switch (adev->asic_type) {
3678 adev->gfx.config.double_offchip_lds_buf = 1;
3682 adev->gfx.config.double_offchip_lds_buf = 0;
3687 static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
3689 u32 tmp, sh_static_mem_cfg;
3692 WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF);
3693 WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3694 WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3695 WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
3697 gfx_v8_0_tiling_mode_table_init(adev);
3698 gfx_v8_0_setup_rb(adev);
3699 gfx_v8_0_get_cu_info(adev);
3700 gfx_v8_0_config_init(adev);
3702 /* XXX SH_MEM regs */
3703 /* where to put LDS, scratch, GPUVM in FSA64 space */
3704 sh_static_mem_cfg = REG_SET_FIELD(0, SH_STATIC_MEM_CONFIG,
3706 sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3708 sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3710 mutex_lock(&adev->srbm_mutex);
3711 for (i = 0; i < adev->vm_manager.id_mgr[0].num_ids; i++) {
3712 vi_srbm_select(adev, 0, 0, 0, i);
3713 /* CP and shaders */
3715 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
3716 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3717 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3718 SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3719 WREG32(mmSH_MEM_CONFIG, tmp);
3720 WREG32(mmSH_MEM_BASES, 0);
3722 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
3723 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3724 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3725 SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3726 WREG32(mmSH_MEM_CONFIG, tmp);
3727 tmp = adev->mc.shared_aperture_start >> 48;
3728 WREG32(mmSH_MEM_BASES, tmp);
3731 WREG32(mmSH_MEM_APE1_BASE, 1);
3732 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3733 WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg);
3735 vi_srbm_select(adev, 0, 0, 0, 0);
3736 mutex_unlock(&adev->srbm_mutex);
3738 gfx_v8_0_init_compute_vmid(adev);
3740 mutex_lock(&adev->grbm_idx_mutex);
3742 * making sure that the following register writes will be broadcasted
3743 * to all the shaders
3745 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3747 WREG32(mmPA_SC_FIFO_SIZE,
3748 (adev->gfx.config.sc_prim_fifo_size_frontend <<
3749 PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
3750 (adev->gfx.config.sc_prim_fifo_size_backend <<
3751 PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
3752 (adev->gfx.config.sc_hiz_tile_fifo_size <<
3753 PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
3754 (adev->gfx.config.sc_earlyz_tile_fifo_size <<
3755 PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
3757 tmp = RREG32(mmSPI_ARB_PRIORITY);
3758 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS0, 2);
3759 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS1, 2);
3760 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS2, 2);
3761 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS3, 2);
3762 WREG32(mmSPI_ARB_PRIORITY, tmp);
3764 mutex_unlock(&adev->grbm_idx_mutex);
3768 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3773 mutex_lock(&adev->grbm_idx_mutex);
3774 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3775 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3776 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3777 for (k = 0; k < adev->usec_timeout; k++) {
3778 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3784 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3785 mutex_unlock(&adev->grbm_idx_mutex);
3787 mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3788 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3789 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3790 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3791 for (k = 0; k < adev->usec_timeout; k++) {
3792 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3798 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3801 u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3803 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
3804 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
3805 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
3806 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
3808 WREG32(mmCP_INT_CNTL_RING0, tmp);
3811 static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
3814 WREG32(mmRLC_CSIB_ADDR_HI,
3815 adev->gfx.rlc.clear_state_gpu_addr >> 32);
3816 WREG32(mmRLC_CSIB_ADDR_LO,
3817 adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
3818 WREG32(mmRLC_CSIB_LENGTH,
3819 adev->gfx.rlc.clear_state_size);
3822 static void gfx_v8_0_parse_ind_reg_list(int *register_list_format,
3825 int *unique_indices,
3828 int *ind_start_offsets,
3833 bool new_entry = true;
3835 for (; ind_offset < list_size; ind_offset++) {
3839 ind_start_offsets[*offset_count] = ind_offset;
3840 *offset_count = *offset_count + 1;
3841 BUG_ON(*offset_count >= max_offset);
3844 if (register_list_format[ind_offset] == 0xFFFFFFFF) {
3851 /* look for the matching indice */
3853 indices < *indices_count;
3855 if (unique_indices[indices] ==
3856 register_list_format[ind_offset])
3860 if (indices >= *indices_count) {
3861 unique_indices[*indices_count] =
3862 register_list_format[ind_offset];
3863 indices = *indices_count;
3864 *indices_count = *indices_count + 1;
3865 BUG_ON(*indices_count >= max_indices);
3868 register_list_format[ind_offset] = indices;
3872 static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
3875 int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0};
3876 int indices_count = 0;
3877 int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3878 int offset_count = 0;
3881 unsigned int *register_list_format =
3882 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
3883 if (!register_list_format)
3885 memcpy(register_list_format, adev->gfx.rlc.register_list_format,
3886 adev->gfx.rlc.reg_list_format_size_bytes);
3888 gfx_v8_0_parse_ind_reg_list(register_list_format,
3889 RLC_FormatDirectRegListLength,
3890 adev->gfx.rlc.reg_list_format_size_bytes >> 2,
3893 sizeof(unique_indices) / sizeof(int),
3894 indirect_start_offsets,
3896 sizeof(indirect_start_offsets)/sizeof(int));
3898 /* save and restore list */
3899 WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1);
3901 WREG32(mmRLC_SRM_ARAM_ADDR, 0);
3902 for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
3903 WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]);
3906 WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start);
3907 for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
3908 WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]);
3910 list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
3911 list_size = list_size >> 1;
3912 WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size);
3913 WREG32(mmRLC_GPM_SCRATCH_DATA, list_size);
3915 /* starting offsets starts */
3916 WREG32(mmRLC_GPM_SCRATCH_ADDR,
3917 adev->gfx.rlc.starting_offsets_start);
3918 for (i = 0; i < sizeof(indirect_start_offsets)/sizeof(int); i++)
3919 WREG32(mmRLC_GPM_SCRATCH_DATA,
3920 indirect_start_offsets[i]);
3922 /* unique indices */
3923 temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
3924 data = mmRLC_SRM_INDEX_CNTL_DATA_0;
3925 for (i = 0; i < sizeof(unique_indices) / sizeof(int); i++) {
3926 if (unique_indices[i] != 0) {
3927 WREG32(temp + i, unique_indices[i] & 0x3FFFF);
3928 WREG32(data + i, unique_indices[i] >> 20);
3931 kfree(register_list_format);
3936 static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
3938 WREG32_FIELD(RLC_SRM_CNTL, SRM_ENABLE, 1);
3941 static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev)
3945 WREG32_FIELD(CP_RB_WPTR_POLL_CNTL, IDLE_POLL_COUNT, 0x60);
3947 data = REG_SET_FIELD(0, RLC_PG_DELAY, POWER_UP_DELAY, 0x10);
3948 data = REG_SET_FIELD(data, RLC_PG_DELAY, POWER_DOWN_DELAY, 0x10);
3949 data = REG_SET_FIELD(data, RLC_PG_DELAY, CMD_PROPAGATE_DELAY, 0x10);
3950 data = REG_SET_FIELD(data, RLC_PG_DELAY, MEM_SLEEP_DELAY, 0x10);
3951 WREG32(mmRLC_PG_DELAY, data);
3953 WREG32_FIELD(RLC_PG_DELAY_2, SERDES_CMD_DELAY, 0x3);
3954 WREG32_FIELD(RLC_AUTO_PG_CTRL, GRBM_REG_SAVE_GFX_IDLE_THRESHOLD, 0x55f0);
3958 static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
3961 WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PU_ENABLE, enable ? 1 : 0);
3964 static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
3967 WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PD_ENABLE, enable ? 1 : 0);
3970 static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable)
3972 WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 0 : 1);
3975 static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
3977 if ((adev->asic_type == CHIP_CARRIZO) ||
3978 (adev->asic_type == CHIP_STONEY)) {
3979 gfx_v8_0_init_csb(adev);
3980 gfx_v8_0_init_save_restore_list(adev);
3981 gfx_v8_0_enable_save_restore_machine(adev);
3982 WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
3983 gfx_v8_0_init_power_gating(adev);
3984 WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
3985 } else if ((adev->asic_type == CHIP_POLARIS11) ||
3986 (adev->asic_type == CHIP_POLARIS12)) {
3987 gfx_v8_0_init_csb(adev);
3988 gfx_v8_0_init_save_restore_list(adev);
3989 gfx_v8_0_enable_save_restore_machine(adev);
3990 gfx_v8_0_init_power_gating(adev);
3995 static void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
3997 WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 0);
3999 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4000 gfx_v8_0_wait_for_rlc_serdes(adev);
4003 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
4005 WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4008 WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
4012 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
4014 WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 1);
4016 /* carrizo do enable cp interrupt after cp inited */
4017 if (!(adev->flags & AMD_IS_APU))
4018 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4023 static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev)
4025 const struct rlc_firmware_header_v2_0 *hdr;
4026 const __le32 *fw_data;
4027 unsigned i, fw_size;
4029 if (!adev->gfx.rlc_fw)
4032 hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
4033 amdgpu_ucode_print_rlc_hdr(&hdr->header);
4035 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
4036 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
4037 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
4039 WREG32(mmRLC_GPM_UCODE_ADDR, 0);
4040 for (i = 0; i < fw_size; i++)
4041 WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
4042 WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
4047 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
4052 gfx_v8_0_rlc_stop(adev);
4055 tmp = RREG32(mmRLC_CGCG_CGLS_CTRL);
4056 tmp &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
4057 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4058 WREG32(mmRLC_CGCG_CGLS_CTRL, tmp);
4059 if (adev->asic_type == CHIP_POLARIS11 ||
4060 adev->asic_type == CHIP_POLARIS10 ||
4061 adev->asic_type == CHIP_POLARIS12) {
4062 tmp = RREG32(mmRLC_CGCG_CGLS_CTRL_3D);
4064 WREG32(mmRLC_CGCG_CGLS_CTRL_3D, tmp);
4068 WREG32(mmRLC_PG_CNTL, 0);
4070 gfx_v8_0_rlc_reset(adev);
4071 gfx_v8_0_init_pg(adev);
4073 if (!adev->pp_enabled) {
4074 if (adev->firmware.load_type != AMDGPU_FW_LOAD_SMU) {
4075 /* legacy rlc firmware loading */
4076 r = gfx_v8_0_rlc_load_microcode(adev);
4080 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4081 AMDGPU_UCODE_ID_RLC_G);
4087 gfx_v8_0_rlc_start(adev);
4092 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
4095 u32 tmp = RREG32(mmCP_ME_CNTL);
4098 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
4099 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
4100 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
4102 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
4103 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
4104 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
4105 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
4106 adev->gfx.gfx_ring[i].ready = false;
4108 WREG32(mmCP_ME_CNTL, tmp);
4112 static int gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
4114 const struct gfx_firmware_header_v1_0 *pfp_hdr;
4115 const struct gfx_firmware_header_v1_0 *ce_hdr;
4116 const struct gfx_firmware_header_v1_0 *me_hdr;
4117 const __le32 *fw_data;
4118 unsigned i, fw_size;
4120 if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
4123 pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
4124 adev->gfx.pfp_fw->data;
4125 ce_hdr = (const struct gfx_firmware_header_v1_0 *)
4126 adev->gfx.ce_fw->data;
4127 me_hdr = (const struct gfx_firmware_header_v1_0 *)
4128 adev->gfx.me_fw->data;
4130 amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
4131 amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
4132 amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
4134 gfx_v8_0_cp_gfx_enable(adev, false);
4137 fw_data = (const __le32 *)
4138 (adev->gfx.pfp_fw->data +
4139 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
4140 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
4141 WREG32(mmCP_PFP_UCODE_ADDR, 0);
4142 for (i = 0; i < fw_size; i++)
4143 WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
4144 WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
4147 fw_data = (const __le32 *)
4148 (adev->gfx.ce_fw->data +
4149 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
4150 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
4151 WREG32(mmCP_CE_UCODE_ADDR, 0);
4152 for (i = 0; i < fw_size; i++)
4153 WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
4154 WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
4157 fw_data = (const __le32 *)
4158 (adev->gfx.me_fw->data +
4159 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
4160 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
4161 WREG32(mmCP_ME_RAM_WADDR, 0);
4162 for (i = 0; i < fw_size; i++)
4163 WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
4164 WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
4169 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
4172 const struct cs_section_def *sect = NULL;
4173 const struct cs_extent_def *ext = NULL;
4175 /* begin clear state */
4177 /* context control state */
4180 for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4181 for (ext = sect->section; ext->extent != NULL; ++ext) {
4182 if (sect->id == SECT_CONTEXT)
4183 count += 2 + ext->reg_count;
4188 /* pa_sc_raster_config/pa_sc_raster_config1 */
4190 /* end clear state */
4198 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
4200 struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
4201 const struct cs_section_def *sect = NULL;
4202 const struct cs_extent_def *ext = NULL;
4206 WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
4207 WREG32(mmCP_ENDIAN_SWAP, 0);
4208 WREG32(mmCP_DEVICE_ID, 1);
4210 gfx_v8_0_cp_gfx_enable(adev, true);
4212 r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
4214 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
4218 /* clear state buffer */
4219 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4220 amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4222 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4223 amdgpu_ring_write(ring, 0x80000000);
4224 amdgpu_ring_write(ring, 0x80000000);
4226 for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4227 for (ext = sect->section; ext->extent != NULL; ++ext) {
4228 if (sect->id == SECT_CONTEXT) {
4229 amdgpu_ring_write(ring,
4230 PACKET3(PACKET3_SET_CONTEXT_REG,
4232 amdgpu_ring_write(ring,
4233 ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
4234 for (i = 0; i < ext->reg_count; i++)
4235 amdgpu_ring_write(ring, ext->extent[i]);
4240 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4241 amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
4242 switch (adev->asic_type) {
4244 case CHIP_POLARIS10:
4245 amdgpu_ring_write(ring, 0x16000012);
4246 amdgpu_ring_write(ring, 0x0000002A);
4248 case CHIP_POLARIS11:
4249 case CHIP_POLARIS12:
4250 amdgpu_ring_write(ring, 0x16000012);
4251 amdgpu_ring_write(ring, 0x00000000);
4254 amdgpu_ring_write(ring, 0x3a00161a);
4255 amdgpu_ring_write(ring, 0x0000002e);
4258 amdgpu_ring_write(ring, 0x00000002);
4259 amdgpu_ring_write(ring, 0x00000000);
4262 amdgpu_ring_write(ring, adev->gfx.config.num_rbs == 1 ?
4263 0x00000000 : 0x00000002);
4264 amdgpu_ring_write(ring, 0x00000000);
4267 amdgpu_ring_write(ring, 0x00000000);
4268 amdgpu_ring_write(ring, 0x00000000);
4274 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4275 amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4277 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4278 amdgpu_ring_write(ring, 0);
4280 /* init the CE partitions */
4281 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4282 amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4283 amdgpu_ring_write(ring, 0x8000);
4284 amdgpu_ring_write(ring, 0x8000);
4286 amdgpu_ring_commit(ring);
4290 static void gfx_v8_0_set_cpg_door_bell(struct amdgpu_device *adev, struct amdgpu_ring *ring)
4293 /* no gfx doorbells on iceland */
4294 if (adev->asic_type == CHIP_TOPAZ)
4297 tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
4299 if (ring->use_doorbell) {
4300 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4301 DOORBELL_OFFSET, ring->doorbell_index);
4302 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4304 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4307 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
4310 WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
4312 if (adev->flags & AMD_IS_APU)
4315 tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
4316 DOORBELL_RANGE_LOWER,
4317 AMDGPU_DOORBELL_GFX_RING0);
4318 WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
4320 WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
4321 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
4324 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
4326 struct amdgpu_ring *ring;
4329 u64 rb_addr, rptr_addr, wptr_gpu_addr;
4332 /* Set the write pointer delay */
4333 WREG32(mmCP_RB_WPTR_DELAY, 0);
4335 /* set the RB to use vmid 0 */
4336 WREG32(mmCP_RB_VMID, 0);
4338 /* Set ring buffer size */
4339 ring = &adev->gfx.gfx_ring[0];
4340 rb_bufsz = order_base_2(ring->ring_size / 8);
4341 tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
4342 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
4343 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
4344 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
4346 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
4348 WREG32(mmCP_RB0_CNTL, tmp);
4350 /* Initialize the ring buffer's read and write pointers */
4351 WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
4353 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
4355 /* set the wb address wether it's enabled or not */
4356 rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4357 WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
4358 WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
4360 wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4361 WREG32(mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
4362 WREG32(mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
4364 WREG32(mmCP_RB0_CNTL, tmp);
4366 rb_addr = ring->gpu_addr >> 8;
4367 WREG32(mmCP_RB0_BASE, rb_addr);
4368 WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
4370 gfx_v8_0_set_cpg_door_bell(adev, ring);
4371 /* start the ring */
4372 amdgpu_ring_clear_ring(ring);
4373 gfx_v8_0_cp_gfx_start(adev);
4375 r = amdgpu_ring_test_ring(ring);
4377 ring->ready = false;
4382 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
4387 WREG32(mmCP_MEC_CNTL, 0);
4389 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
4390 for (i = 0; i < adev->gfx.num_compute_rings; i++)
4391 adev->gfx.compute_ring[i].ready = false;
4392 adev->gfx.kiq.ring.ready = false;
4397 static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev)
4399 const struct gfx_firmware_header_v1_0 *mec_hdr;
4400 const __le32 *fw_data;
4401 unsigned i, fw_size;
4403 if (!adev->gfx.mec_fw)
4406 gfx_v8_0_cp_compute_enable(adev, false);
4408 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
4409 amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
4411 fw_data = (const __le32 *)
4412 (adev->gfx.mec_fw->data +
4413 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4414 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4417 WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0);
4418 for (i = 0; i < fw_size; i++)
4419 WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data+i));
4420 WREG32(mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version);
4422 /* Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
4423 if (adev->gfx.mec2_fw) {
4424 const struct gfx_firmware_header_v1_0 *mec2_hdr;
4426 mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
4427 amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header);
4429 fw_data = (const __le32 *)
4430 (adev->gfx.mec2_fw->data +
4431 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4432 fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4434 WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0);
4435 for (i = 0; i < fw_size; i++)
4436 WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data+i));
4437 WREG32(mmCP_MEC_ME2_UCODE_ADDR, adev->gfx.mec2_fw_version);
4444 static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring)
4447 struct amdgpu_device *adev = ring->adev;
4449 /* tell RLC which is KIQ queue */
4450 tmp = RREG32(mmRLC_CP_SCHEDULERS);
4452 tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
4453 WREG32(mmRLC_CP_SCHEDULERS, tmp);
4455 WREG32(mmRLC_CP_SCHEDULERS, tmp);
4458 static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev)
4460 struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
4461 uint32_t scratch, tmp = 0;
4462 uint64_t queue_mask = 0;
4465 for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
4466 if (!test_bit(i, adev->gfx.mec.queue_bitmap))
4469 /* This situation may be hit in the future if a new HW
4470 * generation exposes more than 64 queues. If so, the
4471 * definition of queue_mask needs updating */
4472 if (WARN_ON(i >= (sizeof(queue_mask)*8))) {
4473 DRM_ERROR("Invalid KCQ enabled: %d\n", i);
4477 queue_mask |= (1ull << i);
4480 r = amdgpu_gfx_scratch_get(adev, &scratch);
4482 DRM_ERROR("Failed to get scratch reg (%d).\n", r);
4485 WREG32(scratch, 0xCAFEDEAD);
4487 r = amdgpu_ring_alloc(kiq_ring, (8 * adev->gfx.num_compute_rings) + 11);
4489 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
4490 amdgpu_gfx_scratch_free(adev, scratch);
4494 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
4495 amdgpu_ring_write(kiq_ring, 0); /* vmid_mask:0 queue_type:0 (KIQ) */
4496 amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */
4497 amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */
4498 amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
4499 amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
4500 amdgpu_ring_write(kiq_ring, 0); /* oac mask */
4501 amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
4502 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4503 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4504 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
4505 uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4508 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
4509 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
4510 amdgpu_ring_write(kiq_ring,
4511 PACKET3_MAP_QUEUES_NUM_QUEUES(1));
4512 amdgpu_ring_write(kiq_ring,
4513 PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index) |
4514 PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
4515 PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
4516 PACKET3_MAP_QUEUES_ME(ring->me == 1 ? 0 : 1)); /* doorbell */
4517 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
4518 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
4519 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
4520 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
4522 /* write to scratch for completion */
4523 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
4524 amdgpu_ring_write(kiq_ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
4525 amdgpu_ring_write(kiq_ring, 0xDEADBEEF);
4526 amdgpu_ring_commit(kiq_ring);
4528 for (i = 0; i < adev->usec_timeout; i++) {
4529 tmp = RREG32(scratch);
4530 if (tmp == 0xDEADBEEF)
4534 if (i >= adev->usec_timeout) {
4535 DRM_ERROR("KCQ enable failed (scratch(0x%04X)=0x%08X)\n",
4539 amdgpu_gfx_scratch_free(adev, scratch);
4544 static int gfx_v8_0_deactivate_hqd(struct amdgpu_device *adev, u32 req)
4548 if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) {
4549 WREG32_FIELD(CP_HQD_DEQUEUE_REQUEST, DEQUEUE_REQ, req);
4550 for (i = 0; i < adev->usec_timeout; i++) {
4551 if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK))
4555 if (i == adev->usec_timeout)
4558 WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
4559 WREG32(mmCP_HQD_PQ_RPTR, 0);
4560 WREG32(mmCP_HQD_PQ_WPTR, 0);
4565 static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
4567 struct amdgpu_device *adev = ring->adev;
4568 struct vi_mqd *mqd = ring->mqd_ptr;
4569 uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
4572 mqd->header = 0xC0310800;
4573 mqd->compute_pipelinestat_enable = 0x00000001;
4574 mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4575 mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4576 mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4577 mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4578 mqd->compute_misc_reserved = 0x00000003;
4579 if (!(adev->flags & AMD_IS_APU)) {
4580 mqd->dynamic_cu_mask_addr_lo = lower_32_bits(ring->mqd_gpu_addr
4581 + offsetof(struct vi_mqd_allocation, dyamic_cu_mask));
4582 mqd->dynamic_cu_mask_addr_hi = upper_32_bits(ring->mqd_gpu_addr
4583 + offsetof(struct vi_mqd_allocation, dyamic_cu_mask));
4585 eop_base_addr = ring->eop_gpu_addr >> 8;
4586 mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
4587 mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
4589 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4590 tmp = RREG32(mmCP_HQD_EOP_CONTROL);
4591 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
4592 (order_base_2(GFX8_MEC_HPD_SIZE / 4) - 1));
4594 mqd->cp_hqd_eop_control = tmp;
4596 /* enable doorbell? */
4597 tmp = REG_SET_FIELD(RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL),
4598 CP_HQD_PQ_DOORBELL_CONTROL,
4600 ring->use_doorbell ? 1 : 0);
4602 mqd->cp_hqd_pq_doorbell_control = tmp;
4604 /* set the pointer to the MQD */
4605 mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
4606 mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
4608 /* set MQD vmid to 0 */
4609 tmp = RREG32(mmCP_MQD_CONTROL);
4610 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4611 mqd->cp_mqd_control = tmp;
4613 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4614 hqd_gpu_addr = ring->gpu_addr >> 8;
4615 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4616 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4618 /* set up the HQD, this is similar to CP_RB0_CNTL */
4619 tmp = RREG32(mmCP_HQD_PQ_CONTROL);
4620 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4621 (order_base_2(ring->ring_size / 4) - 1));
4622 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4623 ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
4625 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
4627 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
4628 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
4629 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4630 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4631 mqd->cp_hqd_pq_control = tmp;
4633 /* set the wb address whether it's enabled or not */
4634 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4635 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4636 mqd->cp_hqd_pq_rptr_report_addr_hi =
4637 upper_32_bits(wb_gpu_addr) & 0xffff;
4639 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4640 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4641 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
4642 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4645 /* enable the doorbell if requested */
4646 if (ring->use_doorbell) {
4647 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4648 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4649 DOORBELL_OFFSET, ring->doorbell_index);
4651 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4653 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4654 DOORBELL_SOURCE, 0);
4655 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4659 mqd->cp_hqd_pq_doorbell_control = tmp;
4661 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4663 mqd->cp_hqd_pq_wptr = ring->wptr;
4664 mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
4666 /* set the vmid for the queue */
4667 mqd->cp_hqd_vmid = 0;
4669 tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
4670 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
4671 mqd->cp_hqd_persistent_state = tmp;
4674 tmp = RREG32(mmCP_HQD_IB_CONTROL);
4675 tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
4676 tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MTYPE, 3);
4677 mqd->cp_hqd_ib_control = tmp;
4679 tmp = RREG32(mmCP_HQD_IQ_TIMER);
4680 tmp = REG_SET_FIELD(tmp, CP_HQD_IQ_TIMER, MTYPE, 3);
4681 mqd->cp_hqd_iq_timer = tmp;
4683 tmp = RREG32(mmCP_HQD_CTX_SAVE_CONTROL);
4684 tmp = REG_SET_FIELD(tmp, CP_HQD_CTX_SAVE_CONTROL, MTYPE, 3);
4685 mqd->cp_hqd_ctx_save_control = tmp;
4688 mqd->cp_hqd_eop_rptr = RREG32(mmCP_HQD_EOP_RPTR);
4689 mqd->cp_hqd_eop_wptr = RREG32(mmCP_HQD_EOP_WPTR);
4690 mqd->cp_hqd_pipe_priority = RREG32(mmCP_HQD_PIPE_PRIORITY);
4691 mqd->cp_hqd_queue_priority = RREG32(mmCP_HQD_QUEUE_PRIORITY);
4692 mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM);
4693 mqd->cp_hqd_ctx_save_base_addr_lo = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_LO);
4694 mqd->cp_hqd_ctx_save_base_addr_hi = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_HI);
4695 mqd->cp_hqd_cntl_stack_offset = RREG32(mmCP_HQD_CNTL_STACK_OFFSET);
4696 mqd->cp_hqd_cntl_stack_size = RREG32(mmCP_HQD_CNTL_STACK_SIZE);
4697 mqd->cp_hqd_wg_state_offset = RREG32(mmCP_HQD_WG_STATE_OFFSET);
4698 mqd->cp_hqd_ctx_save_size = RREG32(mmCP_HQD_CTX_SAVE_SIZE);
4699 mqd->cp_hqd_eop_done_events = RREG32(mmCP_HQD_EOP_EVENTS);
4700 mqd->cp_hqd_error = RREG32(mmCP_HQD_ERROR);
4701 mqd->cp_hqd_eop_wptr_mem = RREG32(mmCP_HQD_EOP_WPTR_MEM);
4702 mqd->cp_hqd_eop_dones = RREG32(mmCP_HQD_EOP_DONES);
4704 /* activate the queue */
4705 mqd->cp_hqd_active = 1;
4710 int gfx_v8_0_mqd_commit(struct amdgpu_device *adev,
4716 /* HQD registers extend from mmCP_MQD_BASE_ADDR to mmCP_HQD_ERROR */
4717 mqd_data = &mqd->cp_mqd_base_addr_lo;
4719 /* disable wptr polling */
4720 WREG32_FIELD(CP_PQ_WPTR_POLL_CNTL, EN, 0);
4722 /* program all HQD registers */
4723 for (mqd_reg = mmCP_HQD_VMID; mqd_reg <= mmCP_HQD_EOP_CONTROL; mqd_reg++)
4724 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4726 /* Tonga errata: EOP RPTR/WPTR should be left unmodified.
4727 * This is safe since EOP RPTR==WPTR for any inactive HQD
4728 * on ASICs that do not support context-save.
4729 * EOP writes/reads can start anywhere in the ring.
4731 if (adev->asic_type != CHIP_TONGA) {
4732 WREG32(mmCP_HQD_EOP_RPTR, mqd->cp_hqd_eop_rptr);
4733 WREG32(mmCP_HQD_EOP_WPTR, mqd->cp_hqd_eop_wptr);
4734 WREG32(mmCP_HQD_EOP_WPTR_MEM, mqd->cp_hqd_eop_wptr_mem);
4737 for (mqd_reg = mmCP_HQD_EOP_EVENTS; mqd_reg <= mmCP_HQD_ERROR; mqd_reg++)
4738 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4740 /* activate the HQD */
4741 for (mqd_reg = mmCP_MQD_BASE_ADDR; mqd_reg <= mmCP_HQD_ACTIVE; mqd_reg++)
4742 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4747 static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring)
4749 struct amdgpu_device *adev = ring->adev;
4750 struct vi_mqd *mqd = ring->mqd_ptr;
4751 int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
4753 gfx_v8_0_kiq_setting(ring);
4755 if (adev->gfx.in_reset) { /* for GPU_RESET case */
4756 /* reset MQD to a clean status */
4757 if (adev->gfx.mec.mqd_backup[mqd_idx])
4758 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4760 /* reset ring buffer */
4762 amdgpu_ring_clear_ring(ring);
4763 mutex_lock(&adev->srbm_mutex);
4764 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4765 gfx_v8_0_mqd_commit(adev, mqd);
4766 vi_srbm_select(adev, 0, 0, 0, 0);
4767 mutex_unlock(&adev->srbm_mutex);
4769 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4770 ((struct vi_mqd_allocation *)mqd)->dyamic_cu_mask = 0xFFFFFFFF;
4771 ((struct vi_mqd_allocation *)mqd)->dyamic_rb_mask = 0xFFFFFFFF;
4772 mutex_lock(&adev->srbm_mutex);
4773 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4774 gfx_v8_0_mqd_init(ring);
4775 gfx_v8_0_mqd_commit(adev, mqd);
4776 vi_srbm_select(adev, 0, 0, 0, 0);
4777 mutex_unlock(&adev->srbm_mutex);
4779 if (adev->gfx.mec.mqd_backup[mqd_idx])
4780 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4786 static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring)
4788 struct amdgpu_device *adev = ring->adev;
4789 struct vi_mqd *mqd = ring->mqd_ptr;
4790 int mqd_idx = ring - &adev->gfx.compute_ring[0];
4792 if (!adev->gfx.in_reset && !adev->gfx.in_suspend) {
4793 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4794 ((struct vi_mqd_allocation *)mqd)->dyamic_cu_mask = 0xFFFFFFFF;
4795 ((struct vi_mqd_allocation *)mqd)->dyamic_rb_mask = 0xFFFFFFFF;
4796 mutex_lock(&adev->srbm_mutex);
4797 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4798 gfx_v8_0_mqd_init(ring);
4799 vi_srbm_select(adev, 0, 0, 0, 0);
4800 mutex_unlock(&adev->srbm_mutex);
4802 if (adev->gfx.mec.mqd_backup[mqd_idx])
4803 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4804 } else if (adev->gfx.in_reset) { /* for GPU_RESET case */
4805 /* reset MQD to a clean status */
4806 if (adev->gfx.mec.mqd_backup[mqd_idx])
4807 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4808 /* reset ring buffer */
4810 amdgpu_ring_clear_ring(ring);
4812 amdgpu_ring_clear_ring(ring);
4817 static void gfx_v8_0_set_mec_doorbell_range(struct amdgpu_device *adev)
4819 if (adev->asic_type > CHIP_TONGA) {
4820 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, AMDGPU_DOORBELL_KIQ << 2);
4821 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, AMDGPU_DOORBELL_MEC_RING7 << 2);
4823 /* enable doorbells */
4824 WREG32_FIELD(CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4827 static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev)
4829 struct amdgpu_ring *ring = NULL;
4832 gfx_v8_0_cp_compute_enable(adev, true);
4834 ring = &adev->gfx.kiq.ring;
4836 r = amdgpu_bo_reserve(ring->mqd_obj, false);
4837 if (unlikely(r != 0))
4840 r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4842 r = gfx_v8_0_kiq_init_queue(ring);
4843 amdgpu_bo_kunmap(ring->mqd_obj);
4844 ring->mqd_ptr = NULL;
4846 amdgpu_bo_unreserve(ring->mqd_obj);
4850 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4851 ring = &adev->gfx.compute_ring[i];
4853 r = amdgpu_bo_reserve(ring->mqd_obj, false);
4854 if (unlikely(r != 0))
4856 r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4858 r = gfx_v8_0_kcq_init_queue(ring);
4859 amdgpu_bo_kunmap(ring->mqd_obj);
4860 ring->mqd_ptr = NULL;
4862 amdgpu_bo_unreserve(ring->mqd_obj);
4867 gfx_v8_0_set_mec_doorbell_range(adev);
4869 r = gfx_v8_0_kiq_kcq_enable(adev);
4874 ring = &adev->gfx.kiq.ring;
4876 r = amdgpu_ring_test_ring(ring);
4878 ring->ready = false;
4883 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4884 ring = &adev->gfx.compute_ring[i];
4886 r = amdgpu_ring_test_ring(ring);
4888 ring->ready = false;
4895 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
4899 if (!(adev->flags & AMD_IS_APU))
4900 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4902 if (!adev->pp_enabled) {
4903 if (adev->firmware.load_type != AMDGPU_FW_LOAD_SMU) {
4904 /* legacy firmware loading */
4905 r = gfx_v8_0_cp_gfx_load_microcode(adev);
4909 r = gfx_v8_0_cp_compute_load_microcode(adev);
4913 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4914 AMDGPU_UCODE_ID_CP_CE);
4918 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4919 AMDGPU_UCODE_ID_CP_PFP);
4923 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4924 AMDGPU_UCODE_ID_CP_ME);
4928 if (adev->asic_type == CHIP_TOPAZ) {
4929 r = gfx_v8_0_cp_compute_load_microcode(adev);
4933 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4934 AMDGPU_UCODE_ID_CP_MEC1);
4941 r = gfx_v8_0_cp_gfx_resume(adev);
4945 r = gfx_v8_0_kiq_resume(adev);
4949 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4954 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
4956 gfx_v8_0_cp_gfx_enable(adev, enable);
4957 gfx_v8_0_cp_compute_enable(adev, enable);
4960 static int gfx_v8_0_hw_init(void *handle)
4963 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4965 gfx_v8_0_init_golden_registers(adev);
4966 gfx_v8_0_gpu_init(adev);
4968 r = gfx_v8_0_rlc_resume(adev);
4972 r = gfx_v8_0_cp_resume(adev);
4977 static int gfx_v8_0_hw_fini(void *handle)
4979 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4981 amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
4982 amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
4983 if (amdgpu_sriov_vf(adev)) {
4984 pr_debug("For SRIOV client, shouldn't do anything.\n");
4987 gfx_v8_0_cp_enable(adev, false);
4988 gfx_v8_0_rlc_stop(adev);
4990 amdgpu_set_powergating_state(adev,
4991 AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_UNGATE);
4996 static int gfx_v8_0_suspend(void *handle)
4998 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4999 adev->gfx.in_suspend = true;
5000 return gfx_v8_0_hw_fini(adev);
5003 static int gfx_v8_0_resume(void *handle)
5006 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5008 r = gfx_v8_0_hw_init(adev);
5009 adev->gfx.in_suspend = false;
5013 static bool gfx_v8_0_is_idle(void *handle)
5015 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5017 if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE))
5023 static int gfx_v8_0_wait_for_idle(void *handle)
5026 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5028 for (i = 0; i < adev->usec_timeout; i++) {
5029 if (gfx_v8_0_is_idle(handle))
5037 static bool gfx_v8_0_check_soft_reset(void *handle)
5039 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5040 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5044 tmp = RREG32(mmGRBM_STATUS);
5045 if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
5046 GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
5047 GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
5048 GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
5049 GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
5050 GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK |
5051 GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
5052 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5053 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
5054 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5055 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
5056 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5057 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5061 tmp = RREG32(mmGRBM_STATUS2);
5062 if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
5063 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5064 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
5066 if (REG_GET_FIELD(tmp, GRBM_STATUS2, CPF_BUSY) ||
5067 REG_GET_FIELD(tmp, GRBM_STATUS2, CPC_BUSY) ||
5068 REG_GET_FIELD(tmp, GRBM_STATUS2, CPG_BUSY)) {
5069 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5071 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5073 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5075 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET,
5076 SOFT_RESET_GRBM, 1);
5080 tmp = RREG32(mmSRBM_STATUS);
5081 if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
5082 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5083 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5084 if (REG_GET_FIELD(tmp, SRBM_STATUS, SEM_BUSY))
5085 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5086 SRBM_SOFT_RESET, SOFT_RESET_SEM, 1);
5088 if (grbm_soft_reset || srbm_soft_reset) {
5089 adev->gfx.grbm_soft_reset = grbm_soft_reset;
5090 adev->gfx.srbm_soft_reset = srbm_soft_reset;
5093 adev->gfx.grbm_soft_reset = 0;
5094 adev->gfx.srbm_soft_reset = 0;
5099 static int gfx_v8_0_pre_soft_reset(void *handle)
5101 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5102 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5104 if ((!adev->gfx.grbm_soft_reset) &&
5105 (!adev->gfx.srbm_soft_reset))
5108 grbm_soft_reset = adev->gfx.grbm_soft_reset;
5109 srbm_soft_reset = adev->gfx.srbm_soft_reset;
5112 gfx_v8_0_rlc_stop(adev);
5114 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5115 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5116 /* Disable GFX parsing/prefetching */
5117 gfx_v8_0_cp_gfx_enable(adev, false);
5119 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5120 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5121 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5122 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5125 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5126 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5128 mutex_lock(&adev->srbm_mutex);
5129 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5130 gfx_v8_0_deactivate_hqd(adev, 2);
5131 vi_srbm_select(adev, 0, 0, 0, 0);
5132 mutex_unlock(&adev->srbm_mutex);
5134 /* Disable MEC parsing/prefetching */
5135 gfx_v8_0_cp_compute_enable(adev, false);
5141 static int gfx_v8_0_soft_reset(void *handle)
5143 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5144 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5147 if ((!adev->gfx.grbm_soft_reset) &&
5148 (!adev->gfx.srbm_soft_reset))
5151 grbm_soft_reset = adev->gfx.grbm_soft_reset;
5152 srbm_soft_reset = adev->gfx.srbm_soft_reset;
5154 if (grbm_soft_reset || srbm_soft_reset) {
5155 tmp = RREG32(mmGMCON_DEBUG);
5156 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 1);
5157 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 1);
5158 WREG32(mmGMCON_DEBUG, tmp);
5162 if (grbm_soft_reset) {
5163 tmp = RREG32(mmGRBM_SOFT_RESET);
5164 tmp |= grbm_soft_reset;
5165 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5166 WREG32(mmGRBM_SOFT_RESET, tmp);
5167 tmp = RREG32(mmGRBM_SOFT_RESET);
5171 tmp &= ~grbm_soft_reset;
5172 WREG32(mmGRBM_SOFT_RESET, tmp);
5173 tmp = RREG32(mmGRBM_SOFT_RESET);
5176 if (srbm_soft_reset) {
5177 tmp = RREG32(mmSRBM_SOFT_RESET);
5178 tmp |= srbm_soft_reset;
5179 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5180 WREG32(mmSRBM_SOFT_RESET, tmp);
5181 tmp = RREG32(mmSRBM_SOFT_RESET);
5185 tmp &= ~srbm_soft_reset;
5186 WREG32(mmSRBM_SOFT_RESET, tmp);
5187 tmp = RREG32(mmSRBM_SOFT_RESET);
5190 if (grbm_soft_reset || srbm_soft_reset) {
5191 tmp = RREG32(mmGMCON_DEBUG);
5192 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 0);
5193 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 0);
5194 WREG32(mmGMCON_DEBUG, tmp);
5197 /* Wait a little for things to settle down */
5203 static int gfx_v8_0_post_soft_reset(void *handle)
5205 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5206 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5208 if ((!adev->gfx.grbm_soft_reset) &&
5209 (!adev->gfx.srbm_soft_reset))
5212 grbm_soft_reset = adev->gfx.grbm_soft_reset;
5213 srbm_soft_reset = adev->gfx.srbm_soft_reset;
5215 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5216 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5217 gfx_v8_0_cp_gfx_resume(adev);
5219 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5220 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5221 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5222 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5225 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5226 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5228 mutex_lock(&adev->srbm_mutex);
5229 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5230 gfx_v8_0_deactivate_hqd(adev, 2);
5231 vi_srbm_select(adev, 0, 0, 0, 0);
5232 mutex_unlock(&adev->srbm_mutex);
5234 gfx_v8_0_kiq_resume(adev);
5236 gfx_v8_0_rlc_start(adev);
5242 * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
5244 * @adev: amdgpu_device pointer
5246 * Fetches a GPU clock counter snapshot.
5247 * Returns the 64 bit clock counter snapshot.
5249 static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
5253 mutex_lock(&adev->gfx.gpu_clock_mutex);
5254 WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
5255 clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
5256 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5257 mutex_unlock(&adev->gfx.gpu_clock_mutex);
5261 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
5263 uint32_t gds_base, uint32_t gds_size,
5264 uint32_t gws_base, uint32_t gws_size,
5265 uint32_t oa_base, uint32_t oa_size)
5267 gds_base = gds_base >> AMDGPU_GDS_SHIFT;
5268 gds_size = gds_size >> AMDGPU_GDS_SHIFT;
5270 gws_base = gws_base >> AMDGPU_GWS_SHIFT;
5271 gws_size = gws_size >> AMDGPU_GWS_SHIFT;
5273 oa_base = oa_base >> AMDGPU_OA_SHIFT;
5274 oa_size = oa_size >> AMDGPU_OA_SHIFT;
5277 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5278 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5279 WRITE_DATA_DST_SEL(0)));
5280 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
5281 amdgpu_ring_write(ring, 0);
5282 amdgpu_ring_write(ring, gds_base);
5285 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5286 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5287 WRITE_DATA_DST_SEL(0)));
5288 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
5289 amdgpu_ring_write(ring, 0);
5290 amdgpu_ring_write(ring, gds_size);
5293 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5294 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5295 WRITE_DATA_DST_SEL(0)));
5296 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
5297 amdgpu_ring_write(ring, 0);
5298 amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
5301 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5302 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5303 WRITE_DATA_DST_SEL(0)));
5304 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
5305 amdgpu_ring_write(ring, 0);
5306 amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
5309 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
5311 WREG32(mmSQ_IND_INDEX,
5312 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5313 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5314 (address << SQ_IND_INDEX__INDEX__SHIFT) |
5315 (SQ_IND_INDEX__FORCE_READ_MASK));
5316 return RREG32(mmSQ_IND_DATA);
5319 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
5320 uint32_t wave, uint32_t thread,
5321 uint32_t regno, uint32_t num, uint32_t *out)
5323 WREG32(mmSQ_IND_INDEX,
5324 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5325 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5326 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
5327 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
5328 (SQ_IND_INDEX__FORCE_READ_MASK) |
5329 (SQ_IND_INDEX__AUTO_INCR_MASK));
5331 *(out++) = RREG32(mmSQ_IND_DATA);
5334 static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
5336 /* type 0 wave data */
5337 dst[(*no_fields)++] = 0;
5338 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
5339 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
5340 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
5341 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
5342 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
5343 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
5344 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
5345 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
5346 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
5347 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
5348 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
5349 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
5350 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_LO);
5351 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_HI);
5352 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_LO);
5353 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI);
5354 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
5355 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
5358 static void gfx_v8_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
5359 uint32_t wave, uint32_t start,
5360 uint32_t size, uint32_t *dst)
5363 adev, simd, wave, 0,
5364 start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
5368 static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = {
5369 .get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter,
5370 .select_se_sh = &gfx_v8_0_select_se_sh,
5371 .read_wave_data = &gfx_v8_0_read_wave_data,
5372 .read_wave_sgprs = &gfx_v8_0_read_wave_sgprs,
5375 static int gfx_v8_0_early_init(void *handle)
5377 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5379 adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
5380 adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
5381 adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
5382 gfx_v8_0_set_ring_funcs(adev);
5383 gfx_v8_0_set_irq_funcs(adev);
5384 gfx_v8_0_set_gds_init(adev);
5385 gfx_v8_0_set_rlc_funcs(adev);
5390 static int gfx_v8_0_late_init(void *handle)
5392 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5395 r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
5399 r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
5403 /* requires IBs so do in late init after IB pool is initialized */
5404 r = gfx_v8_0_do_edc_gpr_workarounds(adev);
5408 amdgpu_set_powergating_state(adev,
5409 AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_GATE);
5414 static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
5417 if ((adev->asic_type == CHIP_POLARIS11) ||
5418 (adev->asic_type == CHIP_POLARIS12))
5419 /* Send msg to SMU via Powerplay */
5420 amdgpu_set_powergating_state(adev,
5421 AMD_IP_BLOCK_TYPE_SMC,
5423 AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE);
5425 WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0);
5428 static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
5431 WREG32_FIELD(RLC_PG_CNTL, DYN_PER_CU_PG_ENABLE, enable ? 1 : 0);
5434 static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
5437 WREG32_FIELD(RLC_PG_CNTL, QUICK_PG_ENABLE, enable ? 1 : 0);
5440 static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
5443 WREG32_FIELD(RLC_PG_CNTL, GFX_POWER_GATING_ENABLE, enable ? 1 : 0);
5446 static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev,
5449 WREG32_FIELD(RLC_PG_CNTL, GFX_PIPELINE_PG_ENABLE, enable ? 1 : 0);
5451 /* Read any GFX register to wake up GFX. */
5453 RREG32(mmDB_RENDER_CONTROL);
5456 static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev,
5459 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
5460 cz_enable_gfx_cg_power_gating(adev, true);
5461 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
5462 cz_enable_gfx_pipeline_power_gating(adev, true);
5464 cz_enable_gfx_cg_power_gating(adev, false);
5465 cz_enable_gfx_pipeline_power_gating(adev, false);
5469 static int gfx_v8_0_set_powergating_state(void *handle,
5470 enum amd_powergating_state state)
5472 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5473 bool enable = (state == AMD_PG_STATE_GATE);
5475 if (amdgpu_sriov_vf(adev))
5478 switch (adev->asic_type) {
5482 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
5483 cz_enable_sck_slow_down_on_power_up(adev, true);
5484 cz_enable_sck_slow_down_on_power_down(adev, true);
5486 cz_enable_sck_slow_down_on_power_up(adev, false);
5487 cz_enable_sck_slow_down_on_power_down(adev, false);
5489 if (adev->pg_flags & AMD_PG_SUPPORT_CP)
5490 cz_enable_cp_power_gating(adev, true);
5492 cz_enable_cp_power_gating(adev, false);
5494 cz_update_gfx_cg_power_gating(adev, enable);
5496 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5497 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5499 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5501 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5502 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5504 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5506 case CHIP_POLARIS11:
5507 case CHIP_POLARIS12:
5508 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5509 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5511 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5513 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5514 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5516 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5518 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable)
5519 polaris11_enable_gfx_quick_mg_power_gating(adev, true);
5521 polaris11_enable_gfx_quick_mg_power_gating(adev, false);
5530 static void gfx_v8_0_get_clockgating_state(void *handle, u32 *flags)
5532 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5535 if (amdgpu_sriov_vf(adev))
5538 /* AMD_CG_SUPPORT_GFX_MGCG */
5539 data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5540 if (!(data & RLC_CGTT_MGCG_OVERRIDE__CPF_MASK))
5541 *flags |= AMD_CG_SUPPORT_GFX_MGCG;
5543 /* AMD_CG_SUPPORT_GFX_CGLG */
5544 data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5545 if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5546 *flags |= AMD_CG_SUPPORT_GFX_CGCG;
5548 /* AMD_CG_SUPPORT_GFX_CGLS */
5549 if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5550 *flags |= AMD_CG_SUPPORT_GFX_CGLS;
5552 /* AMD_CG_SUPPORT_GFX_CGTS */
5553 data = RREG32(mmCGTS_SM_CTRL_REG);
5554 if (!(data & CGTS_SM_CTRL_REG__OVERRIDE_MASK))
5555 *flags |= AMD_CG_SUPPORT_GFX_CGTS;
5557 /* AMD_CG_SUPPORT_GFX_CGTS_LS */
5558 if (!(data & CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK))
5559 *flags |= AMD_CG_SUPPORT_GFX_CGTS_LS;
5561 /* AMD_CG_SUPPORT_GFX_RLC_LS */
5562 data = RREG32(mmRLC_MEM_SLP_CNTL);
5563 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
5564 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
5566 /* AMD_CG_SUPPORT_GFX_CP_LS */
5567 data = RREG32(mmCP_MEM_SLP_CNTL);
5568 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
5569 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
5572 static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
5573 uint32_t reg_addr, uint32_t cmd)
5577 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
5579 WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5580 WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5582 data = RREG32(mmRLC_SERDES_WR_CTRL);
5583 if (adev->asic_type == CHIP_STONEY)
5584 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5585 RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5586 RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5587 RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5588 RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5589 RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5590 RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5591 RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5592 RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5594 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5595 RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5596 RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5597 RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5598 RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5599 RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5600 RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5601 RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5602 RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
5603 RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
5604 RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5605 data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
5606 (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
5607 (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
5608 (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
5610 WREG32(mmRLC_SERDES_WR_CTRL, data);
5613 #define MSG_ENTER_RLC_SAFE_MODE 1
5614 #define MSG_EXIT_RLC_SAFE_MODE 0
5615 #define RLC_GPR_REG2__REQ_MASK 0x00000001
5616 #define RLC_GPR_REG2__REQ__SHIFT 0
5617 #define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001
5618 #define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e
5620 static void iceland_enter_rlc_safe_mode(struct amdgpu_device *adev)
5625 data = RREG32(mmRLC_CNTL);
5626 if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5629 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5630 data |= RLC_SAFE_MODE__CMD_MASK;
5631 data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5632 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
5633 WREG32(mmRLC_SAFE_MODE, data);
5635 for (i = 0; i < adev->usec_timeout; i++) {
5636 if ((RREG32(mmRLC_GPM_STAT) &
5637 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5638 RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5639 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5640 RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5645 for (i = 0; i < adev->usec_timeout; i++) {
5646 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5650 adev->gfx.rlc.in_safe_mode = true;
5654 static void iceland_exit_rlc_safe_mode(struct amdgpu_device *adev)
5659 data = RREG32(mmRLC_CNTL);
5660 if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5663 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5664 if (adev->gfx.rlc.in_safe_mode) {
5665 data |= RLC_SAFE_MODE__CMD_MASK;
5666 data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5667 WREG32(mmRLC_SAFE_MODE, data);
5668 adev->gfx.rlc.in_safe_mode = false;
5672 for (i = 0; i < adev->usec_timeout; i++) {
5673 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5679 static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
5680 .enter_safe_mode = iceland_enter_rlc_safe_mode,
5681 .exit_safe_mode = iceland_exit_rlc_safe_mode
5684 static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
5687 uint32_t temp, data;
5689 adev->gfx.rlc.funcs->enter_safe_mode(adev);
5691 /* It is disabled by HW by default */
5692 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
5693 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5694 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS)
5695 /* 1 - RLC memory Light sleep */
5696 WREG32_FIELD(RLC_MEM_SLP_CNTL, RLC_MEM_LS_EN, 1);
5698 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS)
5699 WREG32_FIELD(CP_MEM_SLP_CNTL, CP_MEM_LS_EN, 1);
5702 /* 3 - RLC_CGTT_MGCG_OVERRIDE */
5703 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5704 if (adev->flags & AMD_IS_APU)
5705 data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5706 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5707 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK);
5709 data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5710 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5711 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5712 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5715 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5717 /* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5718 gfx_v8_0_wait_for_rlc_serdes(adev);
5720 /* 5 - clear mgcg override */
5721 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5723 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
5724 /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
5725 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5726 data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
5727 data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
5728 data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
5729 data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
5730 if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
5731 (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
5732 data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
5733 data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
5734 data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
5736 WREG32(mmCGTS_SM_CTRL_REG, data);
5740 /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5741 gfx_v8_0_wait_for_rlc_serdes(adev);
5743 /* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
5744 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5745 data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5746 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5747 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5748 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5750 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5752 /* 2 - disable MGLS in RLC */
5753 data = RREG32(mmRLC_MEM_SLP_CNTL);
5754 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
5755 data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5756 WREG32(mmRLC_MEM_SLP_CNTL, data);
5759 /* 3 - disable MGLS in CP */
5760 data = RREG32(mmCP_MEM_SLP_CNTL);
5761 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
5762 data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5763 WREG32(mmCP_MEM_SLP_CNTL, data);
5766 /* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
5767 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5768 data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
5769 CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
5771 WREG32(mmCGTS_SM_CTRL_REG, data);
5773 /* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5774 gfx_v8_0_wait_for_rlc_serdes(adev);
5776 /* 6 - set mgcg override */
5777 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5781 /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5782 gfx_v8_0_wait_for_rlc_serdes(adev);
5785 adev->gfx.rlc.funcs->exit_safe_mode(adev);
5788 static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5791 uint32_t temp, temp1, data, data1;
5793 temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5795 adev->gfx.rlc.funcs->enter_safe_mode(adev);
5797 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
5798 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5799 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
5801 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5803 /* : wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5804 gfx_v8_0_wait_for_rlc_serdes(adev);
5806 /* 2 - clear cgcg override */
5807 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5809 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5810 gfx_v8_0_wait_for_rlc_serdes(adev);
5812 /* 3 - write cmd to set CGLS */
5813 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
5815 /* 4 - enable cgcg */
5816 data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5818 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5820 data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5822 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5823 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
5826 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5828 data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5832 WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5834 /* 5 enable cntx_empty_int_enable/cntx_busy_int_enable/
5835 * Cmp_busy/GFX_Idle interrupts
5837 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5839 /* disable cntx_empty_int_enable & GFX Idle interrupt */
5840 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5843 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5844 data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
5845 RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
5847 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5849 /* read gfx register to wake up cgcg */
5850 RREG32(mmCB_CGTT_SCLK_CTRL);
5851 RREG32(mmCB_CGTT_SCLK_CTRL);
5852 RREG32(mmCB_CGTT_SCLK_CTRL);
5853 RREG32(mmCB_CGTT_SCLK_CTRL);
5855 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5856 gfx_v8_0_wait_for_rlc_serdes(adev);
5858 /* write cmd to Set CGCG Overrride */
5859 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5861 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5862 gfx_v8_0_wait_for_rlc_serdes(adev);
5864 /* write cmd to Clear CGLS */
5865 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
5867 /* disable cgcg, cgls should be disabled too. */
5868 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
5869 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
5871 WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5872 /* enable interrupts again for PG */
5873 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5876 gfx_v8_0_wait_for_rlc_serdes(adev);
5878 adev->gfx.rlc.funcs->exit_safe_mode(adev);
5880 static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
5884 /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
5885 * === MGCG + MGLS + TS(CG/LS) ===
5887 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5888 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5890 /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
5891 * === CGCG + CGLS ===
5893 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5894 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5899 static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev,
5900 enum amd_clockgating_state state)
5902 uint32_t msg_id, pp_state = 0;
5903 uint32_t pp_support_state = 0;
5904 void *pp_handle = adev->powerplay.pp_handle;
5906 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
5907 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5908 pp_support_state = PP_STATE_SUPPORT_LS;
5909 pp_state = PP_STATE_LS;
5911 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
5912 pp_support_state |= PP_STATE_SUPPORT_CG;
5913 pp_state |= PP_STATE_CG;
5915 if (state == AMD_CG_STATE_UNGATE)
5918 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5922 amd_set_clockgating_by_smu(pp_handle, msg_id);
5925 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
5926 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5927 pp_support_state = PP_STATE_SUPPORT_LS;
5928 pp_state = PP_STATE_LS;
5931 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
5932 pp_support_state |= PP_STATE_SUPPORT_CG;
5933 pp_state |= PP_STATE_CG;
5936 if (state == AMD_CG_STATE_UNGATE)
5939 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5943 amd_set_clockgating_by_smu(pp_handle, msg_id);
5949 static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev,
5950 enum amd_clockgating_state state)
5953 uint32_t msg_id, pp_state = 0;
5954 uint32_t pp_support_state = 0;
5955 void *pp_handle = adev->powerplay.pp_handle;
5957 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
5958 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5959 pp_support_state = PP_STATE_SUPPORT_LS;
5960 pp_state = PP_STATE_LS;
5962 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
5963 pp_support_state |= PP_STATE_SUPPORT_CG;
5964 pp_state |= PP_STATE_CG;
5966 if (state == AMD_CG_STATE_UNGATE)
5969 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5973 amd_set_clockgating_by_smu(pp_handle, msg_id);
5976 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_GFX_3D_CGLS)) {
5977 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) {
5978 pp_support_state = PP_STATE_SUPPORT_LS;
5979 pp_state = PP_STATE_LS;
5981 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) {
5982 pp_support_state |= PP_STATE_SUPPORT_CG;
5983 pp_state |= PP_STATE_CG;
5985 if (state == AMD_CG_STATE_UNGATE)
5988 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5992 amd_set_clockgating_by_smu(pp_handle, msg_id);
5995 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
5996 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5997 pp_support_state = PP_STATE_SUPPORT_LS;
5998 pp_state = PP_STATE_LS;
6001 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
6002 pp_support_state |= PP_STATE_SUPPORT_CG;
6003 pp_state |= PP_STATE_CG;
6006 if (state == AMD_CG_STATE_UNGATE)
6009 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6013 amd_set_clockgating_by_smu(pp_handle, msg_id);
6016 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
6017 pp_support_state = PP_STATE_SUPPORT_LS;
6019 if (state == AMD_CG_STATE_UNGATE)
6022 pp_state = PP_STATE_LS;
6024 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6028 amd_set_clockgating_by_smu(pp_handle, msg_id);
6031 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
6032 pp_support_state = PP_STATE_SUPPORT_LS;
6034 if (state == AMD_CG_STATE_UNGATE)
6037 pp_state = PP_STATE_LS;
6038 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6042 amd_set_clockgating_by_smu(pp_handle, msg_id);
6048 static int gfx_v8_0_set_clockgating_state(void *handle,
6049 enum amd_clockgating_state state)
6051 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
6053 if (amdgpu_sriov_vf(adev))
6056 switch (adev->asic_type) {
6060 gfx_v8_0_update_gfx_clock_gating(adev,
6061 state == AMD_CG_STATE_GATE);
6064 gfx_v8_0_tonga_update_gfx_clock_gating(adev, state);
6066 case CHIP_POLARIS10:
6067 case CHIP_POLARIS11:
6068 case CHIP_POLARIS12:
6069 gfx_v8_0_polaris_update_gfx_clock_gating(adev, state);
6077 static u64 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring)
6079 return ring->adev->wb.wb[ring->rptr_offs];
6082 static u64 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
6084 struct amdgpu_device *adev = ring->adev;
6086 if (ring->use_doorbell)
6087 /* XXX check if swapping is necessary on BE */
6088 return ring->adev->wb.wb[ring->wptr_offs];
6090 return RREG32(mmCP_RB0_WPTR);
6093 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
6095 struct amdgpu_device *adev = ring->adev;
6097 if (ring->use_doorbell) {
6098 /* XXX check if swapping is necessary on BE */
6099 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6100 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6102 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
6103 (void)RREG32(mmCP_RB0_WPTR);
6107 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
6109 u32 ref_and_mask, reg_mem_engine;
6111 if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) ||
6112 (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)) {
6115 ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
6118 ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
6125 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
6126 reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
6129 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6130 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
6131 WAIT_REG_MEM_FUNCTION(3) | /* == */
6133 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
6134 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
6135 amdgpu_ring_write(ring, ref_and_mask);
6136 amdgpu_ring_write(ring, ref_and_mask);
6137 amdgpu_ring_write(ring, 0x20); /* poll interval */
6140 static void gfx_v8_0_ring_emit_vgt_flush(struct amdgpu_ring *ring)
6142 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6143 amdgpu_ring_write(ring, EVENT_TYPE(VS_PARTIAL_FLUSH) |
6146 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6147 amdgpu_ring_write(ring, EVENT_TYPE(VGT_FLUSH) |
6152 static void gfx_v8_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
6154 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6155 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6156 WRITE_DATA_DST_SEL(0) |
6158 amdgpu_ring_write(ring, mmHDP_DEBUG0);
6159 amdgpu_ring_write(ring, 0);
6160 amdgpu_ring_write(ring, 1);
6164 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
6165 struct amdgpu_ib *ib,
6166 unsigned vm_id, bool ctx_switch)
6168 u32 header, control = 0;
6170 if (ib->flags & AMDGPU_IB_FLAG_CE)
6171 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
6173 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
6175 control |= ib->length_dw | (vm_id << 24);
6177 if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
6178 control |= INDIRECT_BUFFER_PRE_ENB(1);
6180 if (!(ib->flags & AMDGPU_IB_FLAG_CE))
6181 gfx_v8_0_ring_emit_de_meta(ring);
6184 amdgpu_ring_write(ring, header);
6185 amdgpu_ring_write(ring,
6189 (ib->gpu_addr & 0xFFFFFFFC));
6190 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6191 amdgpu_ring_write(ring, control);
6194 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
6195 struct amdgpu_ib *ib,
6196 unsigned vm_id, bool ctx_switch)
6198 u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vm_id << 24);
6200 amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
6201 amdgpu_ring_write(ring,
6205 (ib->gpu_addr & 0xFFFFFFFC));
6206 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6207 amdgpu_ring_write(ring, control);
6210 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
6211 u64 seq, unsigned flags)
6213 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6214 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6216 /* EVENT_WRITE_EOP - flush caches, send int */
6217 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6218 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6220 EOP_TC_WB_ACTION_EN |
6221 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6223 amdgpu_ring_write(ring, addr & 0xfffffffc);
6224 amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
6225 DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6226 amdgpu_ring_write(ring, lower_32_bits(seq));
6227 amdgpu_ring_write(ring, upper_32_bits(seq));
6231 static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
6233 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6234 uint32_t seq = ring->fence_drv.sync_seq;
6235 uint64_t addr = ring->fence_drv.gpu_addr;
6237 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6238 amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
6239 WAIT_REG_MEM_FUNCTION(3) | /* equal */
6240 WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
6241 amdgpu_ring_write(ring, addr & 0xfffffffc);
6242 amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
6243 amdgpu_ring_write(ring, seq);
6244 amdgpu_ring_write(ring, 0xffffffff);
6245 amdgpu_ring_write(ring, 4); /* poll interval */
6248 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
6249 unsigned vm_id, uint64_t pd_addr)
6251 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6253 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6254 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6255 WRITE_DATA_DST_SEL(0)) |
6258 amdgpu_ring_write(ring,
6259 (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id));
6261 amdgpu_ring_write(ring,
6262 (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vm_id - 8));
6264 amdgpu_ring_write(ring, 0);
6265 amdgpu_ring_write(ring, pd_addr >> 12);
6267 /* bits 0-15 are the VM contexts0-15 */
6268 /* invalidate the cache */
6269 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6270 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6271 WRITE_DATA_DST_SEL(0)));
6272 amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6273 amdgpu_ring_write(ring, 0);
6274 amdgpu_ring_write(ring, 1 << vm_id);
6276 /* wait for the invalidate to complete */
6277 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6278 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6279 WAIT_REG_MEM_FUNCTION(0) | /* always */
6280 WAIT_REG_MEM_ENGINE(0))); /* me */
6281 amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6282 amdgpu_ring_write(ring, 0);
6283 amdgpu_ring_write(ring, 0); /* ref */
6284 amdgpu_ring_write(ring, 0); /* mask */
6285 amdgpu_ring_write(ring, 0x20); /* poll interval */
6287 /* compute doesn't have PFP */
6289 /* sync PFP to ME, otherwise we might get invalid PFP reads */
6290 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6291 amdgpu_ring_write(ring, 0x0);
6295 static u64 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
6297 return ring->adev->wb.wb[ring->wptr_offs];
6300 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
6302 struct amdgpu_device *adev = ring->adev;
6304 /* XXX check if swapping is necessary on BE */
6305 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6306 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6309 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
6313 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6314 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6316 /* RELEASE_MEM - flush caches, send int */
6317 amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
6318 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6320 EOP_TC_WB_ACTION_EN |
6321 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6323 amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6324 amdgpu_ring_write(ring, addr & 0xfffffffc);
6325 amdgpu_ring_write(ring, upper_32_bits(addr));
6326 amdgpu_ring_write(ring, lower_32_bits(seq));
6327 amdgpu_ring_write(ring, upper_32_bits(seq));
6330 static void gfx_v8_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
6331 u64 seq, unsigned int flags)
6333 /* we only allocate 32bit for each seq wb address */
6334 BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
6336 /* write fence seq to the "addr" */
6337 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6338 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6339 WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
6340 amdgpu_ring_write(ring, lower_32_bits(addr));
6341 amdgpu_ring_write(ring, upper_32_bits(addr));
6342 amdgpu_ring_write(ring, lower_32_bits(seq));
6344 if (flags & AMDGPU_FENCE_FLAG_INT) {
6345 /* set register to trigger INT */
6346 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6347 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6348 WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
6349 amdgpu_ring_write(ring, mmCPC_INT_STATUS);
6350 amdgpu_ring_write(ring, 0);
6351 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
6355 static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring)
6357 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6358 amdgpu_ring_write(ring, 0);
6361 static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
6365 if (amdgpu_sriov_vf(ring->adev))
6366 gfx_v8_0_ring_emit_ce_meta(ring);
6368 dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
6369 if (flags & AMDGPU_HAVE_CTX_SWITCH) {
6370 gfx_v8_0_ring_emit_vgt_flush(ring);
6371 /* set load_global_config & load_global_uconfig */
6373 /* set load_cs_sh_regs */
6375 /* set load_per_context_state & load_gfx_sh_regs for GFX */
6378 /* set load_ce_ram if preamble presented */
6379 if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
6382 /* still load_ce_ram if this is the first time preamble presented
6383 * although there is no context switch happens.
6385 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
6389 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6390 amdgpu_ring_write(ring, dw2);
6391 amdgpu_ring_write(ring, 0);
6394 static unsigned gfx_v8_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
6398 amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
6399 amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
6400 amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
6401 amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
6402 ret = ring->wptr & ring->buf_mask;
6403 amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
6407 static void gfx_v8_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
6411 BUG_ON(offset > ring->buf_mask);
6412 BUG_ON(ring->ring[offset] != 0x55aa55aa);
6414 cur = (ring->wptr & ring->buf_mask) - 1;
6415 if (likely(cur > offset))
6416 ring->ring[offset] = cur - offset;
6418 ring->ring[offset] = (ring->ring_size >> 2) - offset + cur;
6421 static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
6423 struct amdgpu_device *adev = ring->adev;
6425 amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
6426 amdgpu_ring_write(ring, 0 | /* src: register*/
6427 (5 << 8) | /* dst: memory */
6428 (1 << 20)); /* write confirm */
6429 amdgpu_ring_write(ring, reg);
6430 amdgpu_ring_write(ring, 0);
6431 amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
6432 adev->virt.reg_val_offs * 4));
6433 amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
6434 adev->virt.reg_val_offs * 4));
6437 static void gfx_v8_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
6440 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6441 amdgpu_ring_write(ring, (1 << 16)); /* no inc addr */
6442 amdgpu_ring_write(ring, reg);
6443 amdgpu_ring_write(ring, 0);
6444 amdgpu_ring_write(ring, val);
6447 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
6448 enum amdgpu_interrupt_state state)
6450 WREG32_FIELD(CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE,
6451 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6454 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
6456 enum amdgpu_interrupt_state state)
6458 u32 mec_int_cntl, mec_int_cntl_reg;
6461 * amdgpu controls only the first MEC. That's why this function only
6462 * handles the setting of interrupts for this specific MEC. All other
6463 * pipes' interrupts are set by amdkfd.
6469 mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
6472 mec_int_cntl_reg = mmCP_ME1_PIPE1_INT_CNTL;
6475 mec_int_cntl_reg = mmCP_ME1_PIPE2_INT_CNTL;
6478 mec_int_cntl_reg = mmCP_ME1_PIPE3_INT_CNTL;
6481 DRM_DEBUG("invalid pipe %d\n", pipe);
6485 DRM_DEBUG("invalid me %d\n", me);
6490 case AMDGPU_IRQ_STATE_DISABLE:
6491 mec_int_cntl = RREG32(mec_int_cntl_reg);
6492 mec_int_cntl &= ~CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6493 WREG32(mec_int_cntl_reg, mec_int_cntl);
6495 case AMDGPU_IRQ_STATE_ENABLE:
6496 mec_int_cntl = RREG32(mec_int_cntl_reg);
6497 mec_int_cntl |= CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6498 WREG32(mec_int_cntl_reg, mec_int_cntl);
6505 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
6506 struct amdgpu_irq_src *source,
6508 enum amdgpu_interrupt_state state)
6510 WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE,
6511 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6516 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
6517 struct amdgpu_irq_src *source,
6519 enum amdgpu_interrupt_state state)
6521 WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE,
6522 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6527 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6528 struct amdgpu_irq_src *src,
6530 enum amdgpu_interrupt_state state)
6533 case AMDGPU_CP_IRQ_GFX_EOP:
6534 gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
6536 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6537 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6539 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6540 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6542 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6543 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6545 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6546 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6548 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6549 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6551 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6552 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6554 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6555 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6557 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6558 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6566 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
6567 struct amdgpu_irq_src *source,
6568 struct amdgpu_iv_entry *entry)
6571 u8 me_id, pipe_id, queue_id;
6572 struct amdgpu_ring *ring;
6574 DRM_DEBUG("IH: CP EOP\n");
6575 me_id = (entry->ring_id & 0x0c) >> 2;
6576 pipe_id = (entry->ring_id & 0x03) >> 0;
6577 queue_id = (entry->ring_id & 0x70) >> 4;
6581 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6585 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6586 ring = &adev->gfx.compute_ring[i];
6587 /* Per-queue interrupt is supported for MEC starting from VI.
6588 * The interrupt can only be enabled/disabled per pipe instead of per queue.
6590 if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6591 amdgpu_fence_process(ring);
6598 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
6599 struct amdgpu_irq_src *source,
6600 struct amdgpu_iv_entry *entry)
6602 DRM_ERROR("Illegal register access in command stream\n");
6603 schedule_work(&adev->reset_work);
6607 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
6608 struct amdgpu_irq_src *source,
6609 struct amdgpu_iv_entry *entry)
6611 DRM_ERROR("Illegal instruction in command stream\n");
6612 schedule_work(&adev->reset_work);
6616 static int gfx_v8_0_kiq_set_interrupt_state(struct amdgpu_device *adev,
6617 struct amdgpu_irq_src *src,
6619 enum amdgpu_interrupt_state state)
6621 struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
6624 case AMDGPU_CP_KIQ_IRQ_DRIVER0:
6625 WREG32_FIELD(CPC_INT_CNTL, GENERIC2_INT_ENABLE,
6626 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6628 WREG32_FIELD_OFFSET(CP_ME1_PIPE0_INT_CNTL,
6630 GENERIC2_INT_ENABLE,
6631 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6633 WREG32_FIELD_OFFSET(CP_ME2_PIPE0_INT_CNTL,
6635 GENERIC2_INT_ENABLE,
6636 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6639 BUG(); /* kiq only support GENERIC2_INT now */
6645 static int gfx_v8_0_kiq_irq(struct amdgpu_device *adev,
6646 struct amdgpu_irq_src *source,
6647 struct amdgpu_iv_entry *entry)
6649 u8 me_id, pipe_id, queue_id;
6650 struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
6652 me_id = (entry->ring_id & 0x0c) >> 2;
6653 pipe_id = (entry->ring_id & 0x03) >> 0;
6654 queue_id = (entry->ring_id & 0x70) >> 4;
6655 DRM_DEBUG("IH: CPC GENERIC2_INT, me:%d, pipe:%d, queue:%d\n",
6656 me_id, pipe_id, queue_id);
6658 amdgpu_fence_process(ring);
6662 static const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
6664 .early_init = gfx_v8_0_early_init,
6665 .late_init = gfx_v8_0_late_init,
6666 .sw_init = gfx_v8_0_sw_init,
6667 .sw_fini = gfx_v8_0_sw_fini,
6668 .hw_init = gfx_v8_0_hw_init,
6669 .hw_fini = gfx_v8_0_hw_fini,
6670 .suspend = gfx_v8_0_suspend,
6671 .resume = gfx_v8_0_resume,
6672 .is_idle = gfx_v8_0_is_idle,
6673 .wait_for_idle = gfx_v8_0_wait_for_idle,
6674 .check_soft_reset = gfx_v8_0_check_soft_reset,
6675 .pre_soft_reset = gfx_v8_0_pre_soft_reset,
6676 .soft_reset = gfx_v8_0_soft_reset,
6677 .post_soft_reset = gfx_v8_0_post_soft_reset,
6678 .set_clockgating_state = gfx_v8_0_set_clockgating_state,
6679 .set_powergating_state = gfx_v8_0_set_powergating_state,
6680 .get_clockgating_state = gfx_v8_0_get_clockgating_state,
6683 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
6684 .type = AMDGPU_RING_TYPE_GFX,
6686 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6687 .support_64bit_ptrs = false,
6688 .get_rptr = gfx_v8_0_ring_get_rptr,
6689 .get_wptr = gfx_v8_0_ring_get_wptr_gfx,
6690 .set_wptr = gfx_v8_0_ring_set_wptr_gfx,
6691 .emit_frame_size = /* maximum 215dw if count 16 IBs in */
6693 7 + /* PIPELINE_SYNC */
6695 8 + /* FENCE for VM_FLUSH */
6696 20 + /* GDS switch */
6697 4 + /* double SWITCH_BUFFER,
6698 the first COND_EXEC jump to the place just
6699 prior to this double SWITCH_BUFFER */
6707 8 + 8 + /* FENCE x2 */
6708 2, /* SWITCH_BUFFER */
6709 .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_gfx */
6710 .emit_ib = gfx_v8_0_ring_emit_ib_gfx,
6711 .emit_fence = gfx_v8_0_ring_emit_fence_gfx,
6712 .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6713 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6714 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6715 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6716 .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
6717 .test_ring = gfx_v8_0_ring_test_ring,
6718 .test_ib = gfx_v8_0_ring_test_ib,
6719 .insert_nop = amdgpu_ring_insert_nop,
6720 .pad_ib = amdgpu_ring_generic_pad_ib,
6721 .emit_switch_buffer = gfx_v8_ring_emit_sb,
6722 .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
6723 .init_cond_exec = gfx_v8_0_ring_emit_init_cond_exec,
6724 .patch_cond_exec = gfx_v8_0_ring_emit_patch_cond_exec,
6727 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
6728 .type = AMDGPU_RING_TYPE_COMPUTE,
6730 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6731 .support_64bit_ptrs = false,
6732 .get_rptr = gfx_v8_0_ring_get_rptr,
6733 .get_wptr = gfx_v8_0_ring_get_wptr_compute,
6734 .set_wptr = gfx_v8_0_ring_set_wptr_compute,
6736 20 + /* gfx_v8_0_ring_emit_gds_switch */
6737 7 + /* gfx_v8_0_ring_emit_hdp_flush */
6738 5 + /* gfx_v8_0_ring_emit_hdp_invalidate */
6739 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6740 17 + /* gfx_v8_0_ring_emit_vm_flush */
6741 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
6742 .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */
6743 .emit_ib = gfx_v8_0_ring_emit_ib_compute,
6744 .emit_fence = gfx_v8_0_ring_emit_fence_compute,
6745 .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6746 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6747 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6748 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6749 .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
6750 .test_ring = gfx_v8_0_ring_test_ring,
6751 .test_ib = gfx_v8_0_ring_test_ib,
6752 .insert_nop = amdgpu_ring_insert_nop,
6753 .pad_ib = amdgpu_ring_generic_pad_ib,
6756 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {
6757 .type = AMDGPU_RING_TYPE_KIQ,
6759 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6760 .support_64bit_ptrs = false,
6761 .get_rptr = gfx_v8_0_ring_get_rptr,
6762 .get_wptr = gfx_v8_0_ring_get_wptr_compute,
6763 .set_wptr = gfx_v8_0_ring_set_wptr_compute,
6765 20 + /* gfx_v8_0_ring_emit_gds_switch */
6766 7 + /* gfx_v8_0_ring_emit_hdp_flush */
6767 5 + /* gfx_v8_0_ring_emit_hdp_invalidate */
6768 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6769 17 + /* gfx_v8_0_ring_emit_vm_flush */
6770 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */
6771 .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */
6772 .emit_ib = gfx_v8_0_ring_emit_ib_compute,
6773 .emit_fence = gfx_v8_0_ring_emit_fence_kiq,
6774 .test_ring = gfx_v8_0_ring_test_ring,
6775 .test_ib = gfx_v8_0_ring_test_ib,
6776 .insert_nop = amdgpu_ring_insert_nop,
6777 .pad_ib = amdgpu_ring_generic_pad_ib,
6778 .emit_rreg = gfx_v8_0_ring_emit_rreg,
6779 .emit_wreg = gfx_v8_0_ring_emit_wreg,
6782 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
6786 adev->gfx.kiq.ring.funcs = &gfx_v8_0_ring_funcs_kiq;
6788 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
6789 adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
6791 for (i = 0; i < adev->gfx.num_compute_rings; i++)
6792 adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
6795 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
6796 .set = gfx_v8_0_set_eop_interrupt_state,
6797 .process = gfx_v8_0_eop_irq,
6800 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
6801 .set = gfx_v8_0_set_priv_reg_fault_state,
6802 .process = gfx_v8_0_priv_reg_irq,
6805 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
6806 .set = gfx_v8_0_set_priv_inst_fault_state,
6807 .process = gfx_v8_0_priv_inst_irq,
6810 static const struct amdgpu_irq_src_funcs gfx_v8_0_kiq_irq_funcs = {
6811 .set = gfx_v8_0_kiq_set_interrupt_state,
6812 .process = gfx_v8_0_kiq_irq,
6815 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
6817 adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
6818 adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
6820 adev->gfx.priv_reg_irq.num_types = 1;
6821 adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
6823 adev->gfx.priv_inst_irq.num_types = 1;
6824 adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
6826 adev->gfx.kiq.irq.num_types = AMDGPU_CP_KIQ_IRQ_LAST;
6827 adev->gfx.kiq.irq.funcs = &gfx_v8_0_kiq_irq_funcs;
6830 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
6832 adev->gfx.rlc.funcs = &iceland_rlc_funcs;
6835 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
6837 /* init asci gds info */
6838 adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
6839 adev->gds.gws.total_size = 64;
6840 adev->gds.oa.total_size = 16;
6842 if (adev->gds.mem.total_size == 64 * 1024) {
6843 adev->gds.mem.gfx_partition_size = 4096;
6844 adev->gds.mem.cs_partition_size = 4096;
6846 adev->gds.gws.gfx_partition_size = 4;
6847 adev->gds.gws.cs_partition_size = 4;
6849 adev->gds.oa.gfx_partition_size = 4;
6850 adev->gds.oa.cs_partition_size = 1;
6852 adev->gds.mem.gfx_partition_size = 1024;
6853 adev->gds.mem.cs_partition_size = 1024;
6855 adev->gds.gws.gfx_partition_size = 16;
6856 adev->gds.gws.cs_partition_size = 16;
6858 adev->gds.oa.gfx_partition_size = 4;
6859 adev->gds.oa.cs_partition_size = 4;
6863 static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
6871 data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
6872 data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
6874 WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
6877 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
6881 data = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) |
6882 RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
6884 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
6886 return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask;
6889 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
6891 int i, j, k, counter, active_cu_number = 0;
6892 u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
6893 struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
6894 unsigned disable_masks[4 * 2];
6897 memset(cu_info, 0, sizeof(*cu_info));
6899 if (adev->flags & AMD_IS_APU)
6902 ao_cu_num = adev->gfx.config.max_cu_per_sh;
6904 amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
6906 mutex_lock(&adev->grbm_idx_mutex);
6907 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
6908 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
6912 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
6914 gfx_v8_0_set_user_cu_inactive_bitmap(
6915 adev, disable_masks[i * 2 + j]);
6916 bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
6917 cu_info->bitmap[i][j] = bitmap;
6919 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
6920 if (bitmap & mask) {
6921 if (counter < ao_cu_num)
6927 active_cu_number += counter;
6929 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
6930 cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
6933 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6934 mutex_unlock(&adev->grbm_idx_mutex);
6936 cu_info->number = active_cu_number;
6937 cu_info->ao_cu_mask = ao_cu_mask;
6940 const struct amdgpu_ip_block_version gfx_v8_0_ip_block =
6942 .type = AMD_IP_BLOCK_TYPE_GFX,
6946 .funcs = &gfx_v8_0_ip_funcs,
6949 const struct amdgpu_ip_block_version gfx_v8_1_ip_block =
6951 .type = AMD_IP_BLOCK_TYPE_GFX,
6955 .funcs = &gfx_v8_0_ip_funcs,
6958 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
6960 uint64_t ce_payload_addr;
6963 struct vi_ce_ib_state regular;
6964 struct vi_ce_ib_state_chained_ib chained;
6967 if (ring->adev->virt.chained_ib_support) {
6968 ce_payload_addr = AMDGPU_VA_RESERVED_SIZE - 2 * 4096 +
6969 offsetof(struct vi_gfx_meta_data_chained_ib, ce_payload);
6970 cnt_ce = (sizeof(ce_payload.chained) >> 2) + 4 - 2;
6972 ce_payload_addr = AMDGPU_VA_RESERVED_SIZE - 2 * 4096 +
6973 offsetof(struct vi_gfx_meta_data, ce_payload);
6974 cnt_ce = (sizeof(ce_payload.regular) >> 2) + 4 - 2;
6977 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_ce));
6978 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
6979 WRITE_DATA_DST_SEL(8) |
6981 WRITE_DATA_CACHE_POLICY(0));
6982 amdgpu_ring_write(ring, lower_32_bits(ce_payload_addr));
6983 amdgpu_ring_write(ring, upper_32_bits(ce_payload_addr));
6984 amdgpu_ring_write_multiple(ring, (void *)&ce_payload, cnt_ce - 2);
6987 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring)
6989 uint64_t de_payload_addr, gds_addr, csa_addr;
6992 struct vi_de_ib_state regular;
6993 struct vi_de_ib_state_chained_ib chained;
6996 csa_addr = AMDGPU_VA_RESERVED_SIZE - 2 * 4096;
6997 gds_addr = csa_addr + 4096;
6998 if (ring->adev->virt.chained_ib_support) {
6999 de_payload.chained.gds_backup_addrlo = lower_32_bits(gds_addr);
7000 de_payload.chained.gds_backup_addrhi = upper_32_bits(gds_addr);
7001 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data_chained_ib, de_payload);
7002 cnt_de = (sizeof(de_payload.chained) >> 2) + 4 - 2;
7004 de_payload.regular.gds_backup_addrlo = lower_32_bits(gds_addr);
7005 de_payload.regular.gds_backup_addrhi = upper_32_bits(gds_addr);
7006 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data, de_payload);
7007 cnt_de = (sizeof(de_payload.regular) >> 2) + 4 - 2;
7010 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_de));
7011 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
7012 WRITE_DATA_DST_SEL(8) |
7014 WRITE_DATA_CACHE_POLICY(0));
7015 amdgpu_ring_write(ring, lower_32_bits(de_payload_addr));
7016 amdgpu_ring_write(ring, upper_32_bits(de_payload_addr));
7017 amdgpu_ring_write_multiple(ring, (void *)&de_payload, cnt_de - 2);