2 * Copyright 2014 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
23 #include <linux/firmware.h>
26 #include "amdgpu_gfx.h"
28 #include "vi_structs.h"
30 #include "amdgpu_ucode.h"
31 #include "amdgpu_atombios.h"
32 #include "atombios_i2c.h"
33 #include "clearstate_vi.h"
35 #include "gmc/gmc_8_2_d.h"
36 #include "gmc/gmc_8_2_sh_mask.h"
38 #include "oss/oss_3_0_d.h"
39 #include "oss/oss_3_0_sh_mask.h"
41 #include "bif/bif_5_0_d.h"
42 #include "bif/bif_5_0_sh_mask.h"
43 #include "gca/gfx_8_0_d.h"
44 #include "gca/gfx_8_0_enum.h"
45 #include "gca/gfx_8_0_sh_mask.h"
46 #include "gca/gfx_8_0_enum.h"
48 #include "dce/dce_10_0_d.h"
49 #include "dce/dce_10_0_sh_mask.h"
51 #include "smu/smu_7_1_3_d.h"
53 #define GFX8_NUM_GFX_RINGS 1
54 #define GFX8_MEC_HPD_SIZE 2048
56 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
57 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
58 #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
59 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
61 #define ARRAY_MODE(x) ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
62 #define PIPE_CONFIG(x) ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
63 #define TILE_SPLIT(x) ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
64 #define MICRO_TILE_MODE_NEW(x) ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
65 #define SAMPLE_SPLIT(x) ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
66 #define BANK_WIDTH(x) ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
67 #define BANK_HEIGHT(x) ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
68 #define MACRO_TILE_ASPECT(x) ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
69 #define NUM_BANKS(x) ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
71 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK 0x00000001L
72 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK 0x00000002L
73 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK 0x00000004L
74 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK 0x00000008L
75 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK 0x00000010L
76 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK 0x00000020L
79 #define SET_BPM_SERDES_CMD 1
80 #define CLE_BPM_SERDES_CMD 0
82 /* BPM Register Address*/
84 BPM_REG_CGLS_EN = 0, /* Enable/Disable CGLS */
85 BPM_REG_CGLS_ON, /* ON/OFF CGLS: shall be controlled by RLC FW */
86 BPM_REG_CGCG_OVERRIDE, /* Set/Clear CGCG Override */
87 BPM_REG_MGCG_OVERRIDE, /* Set/Clear MGCG Override */
88 BPM_REG_FGCG_OVERRIDE, /* Set/Clear FGCG Override */
92 #define RLC_FormatDirectRegListLength 14
94 MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
95 MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
96 MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
97 MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
98 MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
99 MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
101 MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
102 MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
103 MODULE_FIRMWARE("amdgpu/stoney_me.bin");
104 MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
105 MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
107 MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
108 MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
109 MODULE_FIRMWARE("amdgpu/tonga_me.bin");
110 MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
111 MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
112 MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
114 MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
115 MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
116 MODULE_FIRMWARE("amdgpu/topaz_me.bin");
117 MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
118 MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
120 MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
121 MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
122 MODULE_FIRMWARE("amdgpu/fiji_me.bin");
123 MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
124 MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
125 MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
127 MODULE_FIRMWARE("amdgpu/polaris11_ce.bin");
128 MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin");
129 MODULE_FIRMWARE("amdgpu/polaris11_me.bin");
130 MODULE_FIRMWARE("amdgpu/polaris11_mec.bin");
131 MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin");
132 MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin");
134 MODULE_FIRMWARE("amdgpu/polaris10_ce.bin");
135 MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin");
136 MODULE_FIRMWARE("amdgpu/polaris10_me.bin");
137 MODULE_FIRMWARE("amdgpu/polaris10_mec.bin");
138 MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin");
139 MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin");
141 MODULE_FIRMWARE("amdgpu/polaris12_ce.bin");
142 MODULE_FIRMWARE("amdgpu/polaris12_pfp.bin");
143 MODULE_FIRMWARE("amdgpu/polaris12_me.bin");
144 MODULE_FIRMWARE("amdgpu/polaris12_mec.bin");
145 MODULE_FIRMWARE("amdgpu/polaris12_mec2.bin");
146 MODULE_FIRMWARE("amdgpu/polaris12_rlc.bin");
148 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
150 {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
151 {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
152 {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
153 {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
154 {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
155 {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
156 {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
157 {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
158 {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
159 {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
160 {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
161 {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
162 {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
163 {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
164 {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
165 {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
168 static const u32 golden_settings_tonga_a11[] =
170 mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
171 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
172 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
173 mmGB_GPU_ID, 0x0000000f, 0x00000000,
174 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
175 mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
176 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
177 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
178 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
179 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
180 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
181 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
182 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
183 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
184 mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
185 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
188 static const u32 tonga_golden_common_all[] =
190 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
191 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
192 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
193 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
194 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
195 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
196 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
197 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
200 static const u32 tonga_mgcg_cgcg_init[] =
202 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
203 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
204 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
205 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
206 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
207 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
208 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
209 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
210 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
211 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
212 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
213 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
214 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
215 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
216 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
217 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
218 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
219 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
220 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
221 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
222 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
223 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
224 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
225 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
226 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
227 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
228 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
229 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
230 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
231 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
232 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
233 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
234 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
235 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
236 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
237 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
238 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
239 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
240 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
241 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
242 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
243 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
244 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
245 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
246 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
247 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
248 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
249 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
250 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
251 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
252 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
253 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
254 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
255 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
256 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
257 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
258 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
259 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
260 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
261 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
262 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
263 mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
264 mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
265 mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
266 mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
267 mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
268 mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
269 mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
270 mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
271 mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
272 mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
273 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
274 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
275 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
276 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
279 static const u32 golden_settings_polaris11_a11[] =
281 mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208,
282 mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
283 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
284 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
285 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
286 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
287 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
288 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
289 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
290 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
291 mmSQ_CONFIG, 0x07f80000, 0x01180000,
292 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
293 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
294 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
295 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
296 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
297 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
300 static const u32 polaris11_golden_common_all[] =
302 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
303 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002,
304 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
305 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
306 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
307 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
310 static const u32 golden_settings_polaris10_a11[] =
312 mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
313 mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
314 mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
315 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
316 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
317 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
318 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
319 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
320 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a,
321 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
322 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
323 mmSQ_CONFIG, 0x07f80000, 0x07180000,
324 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
325 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
326 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
327 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
328 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
331 static const u32 polaris10_golden_common_all[] =
333 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
334 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
335 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
336 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
337 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
338 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
339 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
340 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
343 static const u32 fiji_golden_common_all[] =
345 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
346 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
347 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
348 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
349 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
350 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
351 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
352 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
353 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
354 mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
357 static const u32 golden_settings_fiji_a10[] =
359 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
360 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
361 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
362 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
363 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
364 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
365 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
366 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
367 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
368 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
369 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
372 static const u32 fiji_mgcg_cgcg_init[] =
374 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
375 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
376 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
377 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
378 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
379 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
380 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
381 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
382 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
383 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
384 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
385 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
386 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
387 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
388 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
389 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
390 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
391 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
392 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
393 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
394 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
395 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
396 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
397 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
398 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
399 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
400 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
401 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
402 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
403 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
404 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
405 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
406 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
407 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
408 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
411 static const u32 golden_settings_iceland_a11[] =
413 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
414 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
415 mmDB_DEBUG3, 0xc0000000, 0xc0000000,
416 mmGB_GPU_ID, 0x0000000f, 0x00000000,
417 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
418 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
419 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
420 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
421 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
422 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
423 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
424 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
425 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
426 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
427 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
428 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
431 static const u32 iceland_golden_common_all[] =
433 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
434 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
435 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
436 mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
437 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
438 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
439 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
440 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
443 static const u32 iceland_mgcg_cgcg_init[] =
445 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
446 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
447 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
448 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
449 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
450 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
451 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
452 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
453 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
454 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
455 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
456 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
457 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
458 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
459 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
460 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
461 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
462 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
463 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
464 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
465 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
466 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
467 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
468 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
469 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
470 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
471 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
472 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
473 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
474 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
475 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
476 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
477 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
478 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
479 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
480 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
481 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
482 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
483 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
484 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
485 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
486 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
487 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
488 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
489 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
490 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
491 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
492 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
493 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
494 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
495 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
496 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
497 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
498 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
499 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
500 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
501 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
502 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
503 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
504 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
505 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
506 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
507 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
508 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
511 static const u32 cz_golden_settings_a11[] =
513 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
514 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
515 mmGB_GPU_ID, 0x0000000f, 0x00000000,
516 mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
517 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
518 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
519 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
520 mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
521 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
522 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
523 mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
524 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
527 static const u32 cz_golden_common_all[] =
529 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
530 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
531 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
532 mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
533 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
534 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
535 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
536 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
539 static const u32 cz_mgcg_cgcg_init[] =
541 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
542 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
543 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
544 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
545 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
546 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
547 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
548 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
549 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
550 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
551 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
552 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
553 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
554 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
555 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
556 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
557 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
558 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
559 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
560 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
561 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
562 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
563 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
564 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
565 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
566 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
567 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
568 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
569 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
570 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
571 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
572 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
573 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
574 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
575 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
576 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
577 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
578 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
579 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
580 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
581 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
582 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
583 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
584 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
585 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
586 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
587 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
588 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
589 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
590 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
591 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
592 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
593 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
594 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
595 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
596 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
597 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
598 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
599 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
600 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
601 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
602 mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
603 mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
604 mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
605 mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
606 mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
607 mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
608 mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
609 mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
610 mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
611 mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
612 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
613 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
614 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
615 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
618 static const u32 stoney_golden_settings_a11[] =
620 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
621 mmGB_GPU_ID, 0x0000000f, 0x00000000,
622 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
623 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
624 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
625 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
626 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
627 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
628 mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
629 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
632 static const u32 stoney_golden_common_all[] =
634 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
635 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
636 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
637 mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
638 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
639 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
640 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
641 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
644 static const u32 stoney_mgcg_cgcg_init[] =
646 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
647 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
648 mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
649 mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
650 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
653 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
654 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
655 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
656 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
657 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
658 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
659 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring);
660 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring);
662 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
664 switch (adev->asic_type) {
666 amdgpu_program_register_sequence(adev,
667 iceland_mgcg_cgcg_init,
668 (const u32)ARRAY_SIZE(iceland_mgcg_cgcg_init));
669 amdgpu_program_register_sequence(adev,
670 golden_settings_iceland_a11,
671 (const u32)ARRAY_SIZE(golden_settings_iceland_a11));
672 amdgpu_program_register_sequence(adev,
673 iceland_golden_common_all,
674 (const u32)ARRAY_SIZE(iceland_golden_common_all));
677 amdgpu_program_register_sequence(adev,
679 (const u32)ARRAY_SIZE(fiji_mgcg_cgcg_init));
680 amdgpu_program_register_sequence(adev,
681 golden_settings_fiji_a10,
682 (const u32)ARRAY_SIZE(golden_settings_fiji_a10));
683 amdgpu_program_register_sequence(adev,
684 fiji_golden_common_all,
685 (const u32)ARRAY_SIZE(fiji_golden_common_all));
689 amdgpu_program_register_sequence(adev,
690 tonga_mgcg_cgcg_init,
691 (const u32)ARRAY_SIZE(tonga_mgcg_cgcg_init));
692 amdgpu_program_register_sequence(adev,
693 golden_settings_tonga_a11,
694 (const u32)ARRAY_SIZE(golden_settings_tonga_a11));
695 amdgpu_program_register_sequence(adev,
696 tonga_golden_common_all,
697 (const u32)ARRAY_SIZE(tonga_golden_common_all));
701 amdgpu_program_register_sequence(adev,
702 golden_settings_polaris11_a11,
703 (const u32)ARRAY_SIZE(golden_settings_polaris11_a11));
704 amdgpu_program_register_sequence(adev,
705 polaris11_golden_common_all,
706 (const u32)ARRAY_SIZE(polaris11_golden_common_all));
709 amdgpu_program_register_sequence(adev,
710 golden_settings_polaris10_a11,
711 (const u32)ARRAY_SIZE(golden_settings_polaris10_a11));
712 amdgpu_program_register_sequence(adev,
713 polaris10_golden_common_all,
714 (const u32)ARRAY_SIZE(polaris10_golden_common_all));
715 WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C);
716 if (adev->pdev->revision == 0xc7 &&
717 ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) ||
718 (adev->pdev->subsystem_device == 0x4a8 && adev->pdev->subsystem_vendor == 0x1043) ||
719 (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1682))) {
720 amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD);
721 amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0);
725 amdgpu_program_register_sequence(adev,
727 (const u32)ARRAY_SIZE(cz_mgcg_cgcg_init));
728 amdgpu_program_register_sequence(adev,
729 cz_golden_settings_a11,
730 (const u32)ARRAY_SIZE(cz_golden_settings_a11));
731 amdgpu_program_register_sequence(adev,
732 cz_golden_common_all,
733 (const u32)ARRAY_SIZE(cz_golden_common_all));
736 amdgpu_program_register_sequence(adev,
737 stoney_mgcg_cgcg_init,
738 (const u32)ARRAY_SIZE(stoney_mgcg_cgcg_init));
739 amdgpu_program_register_sequence(adev,
740 stoney_golden_settings_a11,
741 (const u32)ARRAY_SIZE(stoney_golden_settings_a11));
742 amdgpu_program_register_sequence(adev,
743 stoney_golden_common_all,
744 (const u32)ARRAY_SIZE(stoney_golden_common_all));
751 static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
753 adev->gfx.scratch.num_reg = 7;
754 adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
755 adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
758 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
760 struct amdgpu_device *adev = ring->adev;
766 r = amdgpu_gfx_scratch_get(adev, &scratch);
768 DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
771 WREG32(scratch, 0xCAFEDEAD);
772 r = amdgpu_ring_alloc(ring, 3);
774 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
776 amdgpu_gfx_scratch_free(adev, scratch);
779 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
780 amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
781 amdgpu_ring_write(ring, 0xDEADBEEF);
782 amdgpu_ring_commit(ring);
784 for (i = 0; i < adev->usec_timeout; i++) {
785 tmp = RREG32(scratch);
786 if (tmp == 0xDEADBEEF)
790 if (i < adev->usec_timeout) {
791 DRM_INFO("ring test on %d succeeded in %d usecs\n",
794 DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
795 ring->idx, scratch, tmp);
798 amdgpu_gfx_scratch_free(adev, scratch);
802 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
804 struct amdgpu_device *adev = ring->adev;
806 struct dma_fence *f = NULL;
811 r = amdgpu_gfx_scratch_get(adev, &scratch);
813 DRM_ERROR("amdgpu: failed to get scratch reg (%ld).\n", r);
816 WREG32(scratch, 0xCAFEDEAD);
817 memset(&ib, 0, sizeof(ib));
818 r = amdgpu_ib_get(adev, NULL, 256, &ib);
820 DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
823 ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
824 ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START));
825 ib.ptr[2] = 0xDEADBEEF;
828 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
832 r = dma_fence_wait_timeout(f, false, timeout);
834 DRM_ERROR("amdgpu: IB test timed out.\n");
838 DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
841 tmp = RREG32(scratch);
842 if (tmp == 0xDEADBEEF) {
843 DRM_INFO("ib test on ring %d succeeded\n", ring->idx);
846 DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n",
851 amdgpu_ib_free(adev, &ib, NULL);
854 amdgpu_gfx_scratch_free(adev, scratch);
859 static void gfx_v8_0_free_microcode(struct amdgpu_device *adev)
861 release_firmware(adev->gfx.pfp_fw);
862 adev->gfx.pfp_fw = NULL;
863 release_firmware(adev->gfx.me_fw);
864 adev->gfx.me_fw = NULL;
865 release_firmware(adev->gfx.ce_fw);
866 adev->gfx.ce_fw = NULL;
867 release_firmware(adev->gfx.rlc_fw);
868 adev->gfx.rlc_fw = NULL;
869 release_firmware(adev->gfx.mec_fw);
870 adev->gfx.mec_fw = NULL;
871 if ((adev->asic_type != CHIP_STONEY) &&
872 (adev->asic_type != CHIP_TOPAZ))
873 release_firmware(adev->gfx.mec2_fw);
874 adev->gfx.mec2_fw = NULL;
876 kfree(adev->gfx.rlc.register_list_format);
879 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
881 const char *chip_name;
884 struct amdgpu_firmware_info *info = NULL;
885 const struct common_firmware_header *header = NULL;
886 const struct gfx_firmware_header_v1_0 *cp_hdr;
887 const struct rlc_firmware_header_v2_0 *rlc_hdr;
888 unsigned int *tmp = NULL, i;
892 switch (adev->asic_type) {
900 chip_name = "carrizo";
906 chip_name = "polaris11";
909 chip_name = "polaris10";
912 chip_name = "polaris12";
915 chip_name = "stoney";
921 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
922 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
925 err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
928 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
929 adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
930 adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
932 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
933 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
936 err = amdgpu_ucode_validate(adev->gfx.me_fw);
939 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
940 adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
942 adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
944 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
945 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
948 err = amdgpu_ucode_validate(adev->gfx.ce_fw);
951 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
952 adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
953 adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
956 * Support for MCBP/Virtualization in combination with chained IBs is
957 * formal released on feature version #46
959 if (adev->gfx.ce_feature_version >= 46 &&
960 adev->gfx.pfp_feature_version >= 46) {
961 adev->virt.chained_ib_support = true;
962 DRM_INFO("Chained IB support enabled!\n");
964 adev->virt.chained_ib_support = false;
966 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
967 err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
970 err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
971 rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
972 adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
973 adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
975 adev->gfx.rlc.save_and_restore_offset =
976 le32_to_cpu(rlc_hdr->save_and_restore_offset);
977 adev->gfx.rlc.clear_state_descriptor_offset =
978 le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
979 adev->gfx.rlc.avail_scratch_ram_locations =
980 le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
981 adev->gfx.rlc.reg_restore_list_size =
982 le32_to_cpu(rlc_hdr->reg_restore_list_size);
983 adev->gfx.rlc.reg_list_format_start =
984 le32_to_cpu(rlc_hdr->reg_list_format_start);
985 adev->gfx.rlc.reg_list_format_separate_start =
986 le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
987 adev->gfx.rlc.starting_offsets_start =
988 le32_to_cpu(rlc_hdr->starting_offsets_start);
989 adev->gfx.rlc.reg_list_format_size_bytes =
990 le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
991 adev->gfx.rlc.reg_list_size_bytes =
992 le32_to_cpu(rlc_hdr->reg_list_size_bytes);
994 adev->gfx.rlc.register_list_format =
995 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
996 adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
998 if (!adev->gfx.rlc.register_list_format) {
1003 tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1004 le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1005 for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++)
1006 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
1008 adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1010 tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1011 le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1012 for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++)
1013 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
1015 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1016 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1019 err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1022 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1023 adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1024 adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1026 if ((adev->asic_type != CHIP_STONEY) &&
1027 (adev->asic_type != CHIP_TOPAZ)) {
1028 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1029 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1031 err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1034 cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1035 adev->gfx.mec2_fw->data;
1036 adev->gfx.mec2_fw_version =
1037 le32_to_cpu(cp_hdr->header.ucode_version);
1038 adev->gfx.mec2_feature_version =
1039 le32_to_cpu(cp_hdr->ucode_feature_version);
1042 adev->gfx.mec2_fw = NULL;
1046 if (adev->firmware.load_type == AMDGPU_FW_LOAD_SMU) {
1047 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1048 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1049 info->fw = adev->gfx.pfp_fw;
1050 header = (const struct common_firmware_header *)info->fw->data;
1051 adev->firmware.fw_size +=
1052 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1054 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1055 info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1056 info->fw = adev->gfx.me_fw;
1057 header = (const struct common_firmware_header *)info->fw->data;
1058 adev->firmware.fw_size +=
1059 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1061 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1062 info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1063 info->fw = adev->gfx.ce_fw;
1064 header = (const struct common_firmware_header *)info->fw->data;
1065 adev->firmware.fw_size +=
1066 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1068 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1069 info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1070 info->fw = adev->gfx.rlc_fw;
1071 header = (const struct common_firmware_header *)info->fw->data;
1072 adev->firmware.fw_size +=
1073 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1075 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1076 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1077 info->fw = adev->gfx.mec_fw;
1078 header = (const struct common_firmware_header *)info->fw->data;
1079 adev->firmware.fw_size +=
1080 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1082 /* we need account JT in */
1083 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1084 adev->firmware.fw_size +=
1085 ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE);
1087 if (amdgpu_sriov_vf(adev)) {
1088 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_STORAGE];
1089 info->ucode_id = AMDGPU_UCODE_ID_STORAGE;
1090 info->fw = adev->gfx.mec_fw;
1091 adev->firmware.fw_size +=
1092 ALIGN(le32_to_cpu(64 * PAGE_SIZE), PAGE_SIZE);
1095 if (adev->gfx.mec2_fw) {
1096 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1097 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1098 info->fw = adev->gfx.mec2_fw;
1099 header = (const struct common_firmware_header *)info->fw->data;
1100 adev->firmware.fw_size +=
1101 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1109 "gfx8: Failed to load firmware \"%s\"\n",
1111 release_firmware(adev->gfx.pfp_fw);
1112 adev->gfx.pfp_fw = NULL;
1113 release_firmware(adev->gfx.me_fw);
1114 adev->gfx.me_fw = NULL;
1115 release_firmware(adev->gfx.ce_fw);
1116 adev->gfx.ce_fw = NULL;
1117 release_firmware(adev->gfx.rlc_fw);
1118 adev->gfx.rlc_fw = NULL;
1119 release_firmware(adev->gfx.mec_fw);
1120 adev->gfx.mec_fw = NULL;
1121 release_firmware(adev->gfx.mec2_fw);
1122 adev->gfx.mec2_fw = NULL;
1127 static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
1128 volatile u32 *buffer)
1131 const struct cs_section_def *sect = NULL;
1132 const struct cs_extent_def *ext = NULL;
1134 if (adev->gfx.rlc.cs_data == NULL)
1139 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1140 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1142 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1143 buffer[count++] = cpu_to_le32(0x80000000);
1144 buffer[count++] = cpu_to_le32(0x80000000);
1146 for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1147 for (ext = sect->section; ext->extent != NULL; ++ext) {
1148 if (sect->id == SECT_CONTEXT) {
1150 cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1151 buffer[count++] = cpu_to_le32(ext->reg_index -
1152 PACKET3_SET_CONTEXT_REG_START);
1153 for (i = 0; i < ext->reg_count; i++)
1154 buffer[count++] = cpu_to_le32(ext->extent[i]);
1161 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1162 buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
1163 PACKET3_SET_CONTEXT_REG_START);
1164 buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config);
1165 buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1);
1167 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1168 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1170 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1171 buffer[count++] = cpu_to_le32(0);
1174 static void cz_init_cp_jump_table(struct amdgpu_device *adev)
1176 const __le32 *fw_data;
1177 volatile u32 *dst_ptr;
1178 int me, i, max_me = 4;
1180 u32 table_offset, table_size;
1182 if (adev->asic_type == CHIP_CARRIZO)
1185 /* write the cp table buffer */
1186 dst_ptr = adev->gfx.rlc.cp_table_ptr;
1187 for (me = 0; me < max_me; me++) {
1189 const struct gfx_firmware_header_v1_0 *hdr =
1190 (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1191 fw_data = (const __le32 *)
1192 (adev->gfx.ce_fw->data +
1193 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1194 table_offset = le32_to_cpu(hdr->jt_offset);
1195 table_size = le32_to_cpu(hdr->jt_size);
1196 } else if (me == 1) {
1197 const struct gfx_firmware_header_v1_0 *hdr =
1198 (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1199 fw_data = (const __le32 *)
1200 (adev->gfx.pfp_fw->data +
1201 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1202 table_offset = le32_to_cpu(hdr->jt_offset);
1203 table_size = le32_to_cpu(hdr->jt_size);
1204 } else if (me == 2) {
1205 const struct gfx_firmware_header_v1_0 *hdr =
1206 (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1207 fw_data = (const __le32 *)
1208 (adev->gfx.me_fw->data +
1209 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1210 table_offset = le32_to_cpu(hdr->jt_offset);
1211 table_size = le32_to_cpu(hdr->jt_size);
1212 } else if (me == 3) {
1213 const struct gfx_firmware_header_v1_0 *hdr =
1214 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1215 fw_data = (const __le32 *)
1216 (adev->gfx.mec_fw->data +
1217 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1218 table_offset = le32_to_cpu(hdr->jt_offset);
1219 table_size = le32_to_cpu(hdr->jt_size);
1220 } else if (me == 4) {
1221 const struct gfx_firmware_header_v1_0 *hdr =
1222 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
1223 fw_data = (const __le32 *)
1224 (adev->gfx.mec2_fw->data +
1225 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1226 table_offset = le32_to_cpu(hdr->jt_offset);
1227 table_size = le32_to_cpu(hdr->jt_size);
1230 for (i = 0; i < table_size; i ++) {
1231 dst_ptr[bo_offset + i] =
1232 cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
1235 bo_offset += table_size;
1239 static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev)
1243 /* clear state block */
1244 if (adev->gfx.rlc.clear_state_obj) {
1245 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, true);
1246 if (unlikely(r != 0))
1247 dev_warn(adev->dev, "(%d) reserve RLC cbs bo failed\n", r);
1248 amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
1249 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1250 amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
1251 adev->gfx.rlc.clear_state_obj = NULL;
1254 /* jump table block */
1255 if (adev->gfx.rlc.cp_table_obj) {
1256 r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, true);
1257 if (unlikely(r != 0))
1258 dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r);
1259 amdgpu_bo_unpin(adev->gfx.rlc.cp_table_obj);
1260 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1261 amdgpu_bo_unref(&adev->gfx.rlc.cp_table_obj);
1262 adev->gfx.rlc.cp_table_obj = NULL;
1266 static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
1268 volatile u32 *dst_ptr;
1270 const struct cs_section_def *cs_data;
1273 adev->gfx.rlc.cs_data = vi_cs_data;
1275 cs_data = adev->gfx.rlc.cs_data;
1278 /* clear state block */
1279 adev->gfx.rlc.clear_state_size = dws = gfx_v8_0_get_csb_size(adev);
1281 if (adev->gfx.rlc.clear_state_obj == NULL) {
1282 r = amdgpu_bo_create(adev, dws * 4, PAGE_SIZE, true,
1283 AMDGPU_GEM_DOMAIN_VRAM,
1284 AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
1285 AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
1287 &adev->gfx.rlc.clear_state_obj);
1289 dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r);
1290 gfx_v8_0_rlc_fini(adev);
1294 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1295 if (unlikely(r != 0)) {
1296 gfx_v8_0_rlc_fini(adev);
1299 r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj, AMDGPU_GEM_DOMAIN_VRAM,
1300 &adev->gfx.rlc.clear_state_gpu_addr);
1302 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1303 dev_warn(adev->dev, "(%d) pin RLC cbs bo failed\n", r);
1304 gfx_v8_0_rlc_fini(adev);
1308 r = amdgpu_bo_kmap(adev->gfx.rlc.clear_state_obj, (void **)&adev->gfx.rlc.cs_ptr);
1310 dev_warn(adev->dev, "(%d) map RLC cbs bo failed\n", r);
1311 gfx_v8_0_rlc_fini(adev);
1314 /* set up the cs buffer */
1315 dst_ptr = adev->gfx.rlc.cs_ptr;
1316 gfx_v8_0_get_csb_buffer(adev, dst_ptr);
1317 amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
1318 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1321 if ((adev->asic_type == CHIP_CARRIZO) ||
1322 (adev->asic_type == CHIP_STONEY)) {
1323 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1324 if (adev->gfx.rlc.cp_table_obj == NULL) {
1325 r = amdgpu_bo_create(adev, adev->gfx.rlc.cp_table_size, PAGE_SIZE, true,
1326 AMDGPU_GEM_DOMAIN_VRAM,
1327 AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
1328 AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
1330 &adev->gfx.rlc.cp_table_obj);
1332 dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r);
1337 r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, false);
1338 if (unlikely(r != 0)) {
1339 dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r);
1342 r = amdgpu_bo_pin(adev->gfx.rlc.cp_table_obj, AMDGPU_GEM_DOMAIN_VRAM,
1343 &adev->gfx.rlc.cp_table_gpu_addr);
1345 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1346 dev_warn(adev->dev, "(%d) pin RLC cp table bo failed\n", r);
1349 r = amdgpu_bo_kmap(adev->gfx.rlc.cp_table_obj, (void **)&adev->gfx.rlc.cp_table_ptr);
1351 dev_warn(adev->dev, "(%d) map RLC cp table bo failed\n", r);
1355 cz_init_cp_jump_table(adev);
1357 amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj);
1358 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1364 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
1368 if (adev->gfx.mec.hpd_eop_obj) {
1369 r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, true);
1370 if (unlikely(r != 0))
1371 dev_warn(adev->dev, "(%d) reserve HPD EOP bo failed\n", r);
1372 amdgpu_bo_unpin(adev->gfx.mec.hpd_eop_obj);
1373 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1374 amdgpu_bo_unref(&adev->gfx.mec.hpd_eop_obj);
1375 adev->gfx.mec.hpd_eop_obj = NULL;
1379 static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
1383 size_t mec_hpd_size;
1385 bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1387 /* take ownership of the relevant compute queues */
1388 amdgpu_gfx_compute_queue_acquire(adev);
1390 mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE;
1392 if (adev->gfx.mec.hpd_eop_obj == NULL) {
1393 r = amdgpu_bo_create(adev,
1396 AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
1397 &adev->gfx.mec.hpd_eop_obj);
1399 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1404 r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
1405 if (unlikely(r != 0)) {
1406 gfx_v8_0_mec_fini(adev);
1409 r = amdgpu_bo_pin(adev->gfx.mec.hpd_eop_obj, AMDGPU_GEM_DOMAIN_GTT,
1410 &adev->gfx.mec.hpd_eop_gpu_addr);
1412 dev_warn(adev->dev, "(%d) pin HDP EOP bo failed\n", r);
1413 gfx_v8_0_mec_fini(adev);
1416 r = amdgpu_bo_kmap(adev->gfx.mec.hpd_eop_obj, (void **)&hpd);
1418 dev_warn(adev->dev, "(%d) map HDP EOP bo failed\n", r);
1419 gfx_v8_0_mec_fini(adev);
1423 memset(hpd, 0, mec_hpd_size);
1425 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1426 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1431 static const u32 vgpr_init_compute_shader[] =
1433 0x7e000209, 0x7e020208,
1434 0x7e040207, 0x7e060206,
1435 0x7e080205, 0x7e0a0204,
1436 0x7e0c0203, 0x7e0e0202,
1437 0x7e100201, 0x7e120200,
1438 0x7e140209, 0x7e160208,
1439 0x7e180207, 0x7e1a0206,
1440 0x7e1c0205, 0x7e1e0204,
1441 0x7e200203, 0x7e220202,
1442 0x7e240201, 0x7e260200,
1443 0x7e280209, 0x7e2a0208,
1444 0x7e2c0207, 0x7e2e0206,
1445 0x7e300205, 0x7e320204,
1446 0x7e340203, 0x7e360202,
1447 0x7e380201, 0x7e3a0200,
1448 0x7e3c0209, 0x7e3e0208,
1449 0x7e400207, 0x7e420206,
1450 0x7e440205, 0x7e460204,
1451 0x7e480203, 0x7e4a0202,
1452 0x7e4c0201, 0x7e4e0200,
1453 0x7e500209, 0x7e520208,
1454 0x7e540207, 0x7e560206,
1455 0x7e580205, 0x7e5a0204,
1456 0x7e5c0203, 0x7e5e0202,
1457 0x7e600201, 0x7e620200,
1458 0x7e640209, 0x7e660208,
1459 0x7e680207, 0x7e6a0206,
1460 0x7e6c0205, 0x7e6e0204,
1461 0x7e700203, 0x7e720202,
1462 0x7e740201, 0x7e760200,
1463 0x7e780209, 0x7e7a0208,
1464 0x7e7c0207, 0x7e7e0206,
1465 0xbf8a0000, 0xbf810000,
1468 static const u32 sgpr_init_compute_shader[] =
1470 0xbe8a0100, 0xbe8c0102,
1471 0xbe8e0104, 0xbe900106,
1472 0xbe920108, 0xbe940100,
1473 0xbe960102, 0xbe980104,
1474 0xbe9a0106, 0xbe9c0108,
1475 0xbe9e0100, 0xbea00102,
1476 0xbea20104, 0xbea40106,
1477 0xbea60108, 0xbea80100,
1478 0xbeaa0102, 0xbeac0104,
1479 0xbeae0106, 0xbeb00108,
1480 0xbeb20100, 0xbeb40102,
1481 0xbeb60104, 0xbeb80106,
1482 0xbeba0108, 0xbebc0100,
1483 0xbebe0102, 0xbec00104,
1484 0xbec20106, 0xbec40108,
1485 0xbec60100, 0xbec80102,
1486 0xbee60004, 0xbee70005,
1487 0xbeea0006, 0xbeeb0007,
1488 0xbee80008, 0xbee90009,
1489 0xbefc0000, 0xbf8a0000,
1490 0xbf810000, 0x00000000,
1493 static const u32 vgpr_init_regs[] =
1495 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1496 mmCOMPUTE_RESOURCE_LIMITS, 0,
1497 mmCOMPUTE_NUM_THREAD_X, 256*4,
1498 mmCOMPUTE_NUM_THREAD_Y, 1,
1499 mmCOMPUTE_NUM_THREAD_Z, 1,
1500 mmCOMPUTE_PGM_RSRC2, 20,
1501 mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1502 mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1503 mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1504 mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1505 mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1506 mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1507 mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1508 mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1509 mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1510 mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1513 static const u32 sgpr1_init_regs[] =
1515 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1516 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1517 mmCOMPUTE_NUM_THREAD_X, 256*5,
1518 mmCOMPUTE_NUM_THREAD_Y, 1,
1519 mmCOMPUTE_NUM_THREAD_Z, 1,
1520 mmCOMPUTE_PGM_RSRC2, 20,
1521 mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1522 mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1523 mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1524 mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1525 mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1526 mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1527 mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1528 mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1529 mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1530 mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1533 static const u32 sgpr2_init_regs[] =
1535 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1536 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1537 mmCOMPUTE_NUM_THREAD_X, 256*5,
1538 mmCOMPUTE_NUM_THREAD_Y, 1,
1539 mmCOMPUTE_NUM_THREAD_Z, 1,
1540 mmCOMPUTE_PGM_RSRC2, 20,
1541 mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1542 mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1543 mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1544 mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1545 mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1546 mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1547 mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1548 mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1549 mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1550 mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1553 static const u32 sec_ded_counter_registers[] =
1556 mmCPC_EDC_SCRATCH_CNT,
1557 mmCPC_EDC_UCODE_CNT,
1564 mmDC_EDC_CSINVOC_CNT,
1565 mmDC_EDC_RESTORE_CNT,
1571 mmSQC_ATC_EDC_GATCL1_CNT,
1577 mmTCP_ATC_EDC_GATCL1_CNT,
1582 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1584 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1585 struct amdgpu_ib ib;
1586 struct dma_fence *f = NULL;
1589 unsigned total_size, vgpr_offset, sgpr_offset;
1592 /* only supported on CZ */
1593 if (adev->asic_type != CHIP_CARRIZO)
1596 /* bail if the compute ring is not ready */
1600 tmp = RREG32(mmGB_EDC_MODE);
1601 WREG32(mmGB_EDC_MODE, 0);
1604 (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1606 (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1608 (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1609 total_size = ALIGN(total_size, 256);
1610 vgpr_offset = total_size;
1611 total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1612 sgpr_offset = total_size;
1613 total_size += sizeof(sgpr_init_compute_shader);
1615 /* allocate an indirect buffer to put the commands in */
1616 memset(&ib, 0, sizeof(ib));
1617 r = amdgpu_ib_get(adev, NULL, total_size, &ib);
1619 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1623 /* load the compute shaders */
1624 for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1625 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1627 for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1628 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1630 /* init the ib length to 0 */
1634 /* write the register state for the compute dispatch */
1635 for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1636 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1637 ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1638 ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1640 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1641 gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1642 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1643 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1644 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1645 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1647 /* write dispatch packet */
1648 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1649 ib.ptr[ib.length_dw++] = 8; /* x */
1650 ib.ptr[ib.length_dw++] = 1; /* y */
1651 ib.ptr[ib.length_dw++] = 1; /* z */
1652 ib.ptr[ib.length_dw++] =
1653 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1655 /* write CS partial flush packet */
1656 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1657 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1660 /* write the register state for the compute dispatch */
1661 for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1662 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1663 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1664 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1666 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1667 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1668 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1669 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1670 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1671 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1673 /* write dispatch packet */
1674 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1675 ib.ptr[ib.length_dw++] = 8; /* x */
1676 ib.ptr[ib.length_dw++] = 1; /* y */
1677 ib.ptr[ib.length_dw++] = 1; /* z */
1678 ib.ptr[ib.length_dw++] =
1679 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1681 /* write CS partial flush packet */
1682 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1683 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1686 /* write the register state for the compute dispatch */
1687 for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1688 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1689 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1690 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1692 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1693 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1694 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1695 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1696 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1697 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1699 /* write dispatch packet */
1700 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1701 ib.ptr[ib.length_dw++] = 8; /* x */
1702 ib.ptr[ib.length_dw++] = 1; /* y */
1703 ib.ptr[ib.length_dw++] = 1; /* z */
1704 ib.ptr[ib.length_dw++] =
1705 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1707 /* write CS partial flush packet */
1708 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1709 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1711 /* shedule the ib on the ring */
1712 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1714 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1718 /* wait for the GPU to finish processing the IB */
1719 r = dma_fence_wait(f, false);
1721 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1725 tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1726 tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1727 WREG32(mmGB_EDC_MODE, tmp);
1729 tmp = RREG32(mmCC_GC_EDC_CONFIG);
1730 tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1731 WREG32(mmCC_GC_EDC_CONFIG, tmp);
1734 /* read back registers to clear the counters */
1735 for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1736 RREG32(sec_ded_counter_registers[i]);
1739 amdgpu_ib_free(adev, &ib, NULL);
1745 static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1748 u32 mc_shared_chmap, mc_arb_ramcfg;
1749 u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1753 switch (adev->asic_type) {
1755 adev->gfx.config.max_shader_engines = 1;
1756 adev->gfx.config.max_tile_pipes = 2;
1757 adev->gfx.config.max_cu_per_sh = 6;
1758 adev->gfx.config.max_sh_per_se = 1;
1759 adev->gfx.config.max_backends_per_se = 2;
1760 adev->gfx.config.max_texture_channel_caches = 2;
1761 adev->gfx.config.max_gprs = 256;
1762 adev->gfx.config.max_gs_threads = 32;
1763 adev->gfx.config.max_hw_contexts = 8;
1765 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1766 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1767 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1768 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1769 gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1772 adev->gfx.config.max_shader_engines = 4;
1773 adev->gfx.config.max_tile_pipes = 16;
1774 adev->gfx.config.max_cu_per_sh = 16;
1775 adev->gfx.config.max_sh_per_se = 1;
1776 adev->gfx.config.max_backends_per_se = 4;
1777 adev->gfx.config.max_texture_channel_caches = 16;
1778 adev->gfx.config.max_gprs = 256;
1779 adev->gfx.config.max_gs_threads = 32;
1780 adev->gfx.config.max_hw_contexts = 8;
1782 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1783 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1784 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1785 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1786 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1788 case CHIP_POLARIS11:
1789 case CHIP_POLARIS12:
1790 ret = amdgpu_atombios_get_gfx_info(adev);
1793 adev->gfx.config.max_gprs = 256;
1794 adev->gfx.config.max_gs_threads = 32;
1795 adev->gfx.config.max_hw_contexts = 8;
1797 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1798 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1799 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1800 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1801 gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
1803 case CHIP_POLARIS10:
1804 ret = amdgpu_atombios_get_gfx_info(adev);
1807 adev->gfx.config.max_gprs = 256;
1808 adev->gfx.config.max_gs_threads = 32;
1809 adev->gfx.config.max_hw_contexts = 8;
1811 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1812 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1813 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1814 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1815 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1818 adev->gfx.config.max_shader_engines = 4;
1819 adev->gfx.config.max_tile_pipes = 8;
1820 adev->gfx.config.max_cu_per_sh = 8;
1821 adev->gfx.config.max_sh_per_se = 1;
1822 adev->gfx.config.max_backends_per_se = 2;
1823 adev->gfx.config.max_texture_channel_caches = 8;
1824 adev->gfx.config.max_gprs = 256;
1825 adev->gfx.config.max_gs_threads = 32;
1826 adev->gfx.config.max_hw_contexts = 8;
1828 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1829 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1830 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1831 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1832 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1835 adev->gfx.config.max_shader_engines = 1;
1836 adev->gfx.config.max_tile_pipes = 2;
1837 adev->gfx.config.max_sh_per_se = 1;
1838 adev->gfx.config.max_backends_per_se = 2;
1839 adev->gfx.config.max_cu_per_sh = 8;
1840 adev->gfx.config.max_texture_channel_caches = 2;
1841 adev->gfx.config.max_gprs = 256;
1842 adev->gfx.config.max_gs_threads = 32;
1843 adev->gfx.config.max_hw_contexts = 8;
1845 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1846 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1847 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1848 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1849 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1852 adev->gfx.config.max_shader_engines = 1;
1853 adev->gfx.config.max_tile_pipes = 2;
1854 adev->gfx.config.max_sh_per_se = 1;
1855 adev->gfx.config.max_backends_per_se = 1;
1856 adev->gfx.config.max_cu_per_sh = 3;
1857 adev->gfx.config.max_texture_channel_caches = 2;
1858 adev->gfx.config.max_gprs = 256;
1859 adev->gfx.config.max_gs_threads = 16;
1860 adev->gfx.config.max_hw_contexts = 8;
1862 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1863 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1864 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1865 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1866 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1869 adev->gfx.config.max_shader_engines = 2;
1870 adev->gfx.config.max_tile_pipes = 4;
1871 adev->gfx.config.max_cu_per_sh = 2;
1872 adev->gfx.config.max_sh_per_se = 1;
1873 adev->gfx.config.max_backends_per_se = 2;
1874 adev->gfx.config.max_texture_channel_caches = 4;
1875 adev->gfx.config.max_gprs = 256;
1876 adev->gfx.config.max_gs_threads = 32;
1877 adev->gfx.config.max_hw_contexts = 8;
1879 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1880 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1881 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1882 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1883 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1887 mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
1888 adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1889 mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1891 adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1892 adev->gfx.config.mem_max_burst_length_bytes = 256;
1893 if (adev->flags & AMD_IS_APU) {
1894 /* Get memory bank mapping mode. */
1895 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
1896 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1897 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1899 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
1900 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1901 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1903 /* Validate settings in case only one DIMM installed. */
1904 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
1905 dimm00_addr_map = 0;
1906 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
1907 dimm01_addr_map = 0;
1908 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
1909 dimm10_addr_map = 0;
1910 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
1911 dimm11_addr_map = 0;
1913 /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1914 /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1915 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
1916 adev->gfx.config.mem_row_size_in_kb = 2;
1918 adev->gfx.config.mem_row_size_in_kb = 1;
1920 tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
1921 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1922 if (adev->gfx.config.mem_row_size_in_kb > 4)
1923 adev->gfx.config.mem_row_size_in_kb = 4;
1926 adev->gfx.config.shader_engine_tile_size = 32;
1927 adev->gfx.config.num_gpus = 1;
1928 adev->gfx.config.multi_gpu_tile_size = 64;
1930 /* fix up row size */
1931 switch (adev->gfx.config.mem_row_size_in_kb) {
1934 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
1937 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
1940 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
1943 adev->gfx.config.gb_addr_config = gb_addr_config;
1948 static int gfx_v8_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
1949 int mec, int pipe, int queue)
1953 struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
1955 ring = &adev->gfx.compute_ring[ring_id];
1960 ring->queue = queue;
1962 ring->ring_obj = NULL;
1963 ring->use_doorbell = true;
1964 ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + ring_id;
1965 ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
1966 + (ring_id * GFX8_MEC_HPD_SIZE);
1967 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
1969 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
1970 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
1973 /* type-2 packets are deprecated on MEC, use type-3 instead */
1974 r = amdgpu_ring_init(adev, ring, 1024,
1975 &adev->gfx.eop_irq, irq_type);
1983 static int gfx_v8_0_sw_init(void *handle)
1985 int i, j, k, r, ring_id;
1986 struct amdgpu_ring *ring;
1987 struct amdgpu_kiq *kiq;
1988 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1990 switch (adev->asic_type) {
1993 case CHIP_POLARIS11:
1994 case CHIP_POLARIS12:
1995 case CHIP_POLARIS10:
1997 adev->gfx.mec.num_mec = 2;
2002 adev->gfx.mec.num_mec = 1;
2006 adev->gfx.mec.num_pipe_per_mec = 4;
2007 adev->gfx.mec.num_queue_per_pipe = 8;
2010 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 178, &adev->gfx.kiq.irq);
2015 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 181, &adev->gfx.eop_irq);
2019 /* Privileged reg */
2020 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 184,
2021 &adev->gfx.priv_reg_irq);
2025 /* Privileged inst */
2026 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 185,
2027 &adev->gfx.priv_inst_irq);
2031 adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2033 gfx_v8_0_scratch_init(adev);
2035 r = gfx_v8_0_init_microcode(adev);
2037 DRM_ERROR("Failed to load gfx firmware!\n");
2041 r = gfx_v8_0_rlc_init(adev);
2043 DRM_ERROR("Failed to init rlc BOs!\n");
2047 r = gfx_v8_0_mec_init(adev);
2049 DRM_ERROR("Failed to init MEC BOs!\n");
2053 /* set up the gfx ring */
2054 for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2055 ring = &adev->gfx.gfx_ring[i];
2056 ring->ring_obj = NULL;
2057 sprintf(ring->name, "gfx");
2058 /* no gfx doorbells on iceland */
2059 if (adev->asic_type != CHIP_TOPAZ) {
2060 ring->use_doorbell = true;
2061 ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0;
2064 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2065 AMDGPU_CP_IRQ_GFX_EOP);
2071 /* set up the compute queues - allocate horizontally across pipes */
2073 for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
2074 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
2075 for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
2076 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
2079 r = gfx_v8_0_compute_ring_init(adev,
2090 r = amdgpu_gfx_kiq_init(adev, GFX8_MEC_HPD_SIZE);
2092 DRM_ERROR("Failed to init KIQ BOs!\n");
2096 kiq = &adev->gfx.kiq;
2097 r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
2101 /* create MQD for all compute queues as well as KIQ for SRIOV case */
2102 r = amdgpu_gfx_compute_mqd_sw_init(adev, sizeof(struct vi_mqd_allocation));
2106 /* reserve GDS, GWS and OA resource for gfx */
2107 r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size,
2108 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS,
2109 &adev->gds.gds_gfx_bo, NULL, NULL);
2113 r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size,
2114 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS,
2115 &adev->gds.gws_gfx_bo, NULL, NULL);
2119 r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size,
2120 PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA,
2121 &adev->gds.oa_gfx_bo, NULL, NULL);
2125 adev->gfx.ce_ram_size = 0x8000;
2127 r = gfx_v8_0_gpu_early_init(adev);
2134 static int gfx_v8_0_sw_fini(void *handle)
2137 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2139 amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
2140 amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL);
2141 amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
2143 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2144 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2145 for (i = 0; i < adev->gfx.num_compute_rings; i++)
2146 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2148 amdgpu_gfx_compute_mqd_sw_fini(adev);
2149 amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
2150 amdgpu_gfx_kiq_fini(adev);
2152 gfx_v8_0_mec_fini(adev);
2153 gfx_v8_0_rlc_fini(adev);
2154 gfx_v8_0_free_microcode(adev);
2159 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
2161 uint32_t *modearray, *mod2array;
2162 const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
2163 const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
2166 modearray = adev->gfx.config.tile_mode_array;
2167 mod2array = adev->gfx.config.macrotile_mode_array;
2169 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2170 modearray[reg_offset] = 0;
2172 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2173 mod2array[reg_offset] = 0;
2175 switch (adev->asic_type) {
2177 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2178 PIPE_CONFIG(ADDR_SURF_P2) |
2179 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2180 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2181 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2182 PIPE_CONFIG(ADDR_SURF_P2) |
2183 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2184 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2185 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2186 PIPE_CONFIG(ADDR_SURF_P2) |
2187 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2188 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2189 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2190 PIPE_CONFIG(ADDR_SURF_P2) |
2191 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2192 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2193 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2194 PIPE_CONFIG(ADDR_SURF_P2) |
2195 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2196 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2197 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2198 PIPE_CONFIG(ADDR_SURF_P2) |
2199 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2200 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2201 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2202 PIPE_CONFIG(ADDR_SURF_P2) |
2203 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2204 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2205 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2206 PIPE_CONFIG(ADDR_SURF_P2));
2207 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2208 PIPE_CONFIG(ADDR_SURF_P2) |
2209 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2210 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2211 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2212 PIPE_CONFIG(ADDR_SURF_P2) |
2213 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2214 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2215 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2216 PIPE_CONFIG(ADDR_SURF_P2) |
2217 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2218 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2219 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2220 PIPE_CONFIG(ADDR_SURF_P2) |
2221 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2222 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2223 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2224 PIPE_CONFIG(ADDR_SURF_P2) |
2225 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2226 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2227 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2228 PIPE_CONFIG(ADDR_SURF_P2) |
2229 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2230 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2231 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2232 PIPE_CONFIG(ADDR_SURF_P2) |
2233 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2234 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2235 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2236 PIPE_CONFIG(ADDR_SURF_P2) |
2237 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2238 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2239 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2240 PIPE_CONFIG(ADDR_SURF_P2) |
2241 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2242 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2243 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2244 PIPE_CONFIG(ADDR_SURF_P2) |
2245 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2246 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2247 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2248 PIPE_CONFIG(ADDR_SURF_P2) |
2249 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2250 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2251 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2252 PIPE_CONFIG(ADDR_SURF_P2) |
2253 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2254 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2255 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2256 PIPE_CONFIG(ADDR_SURF_P2) |
2257 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2258 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2259 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2260 PIPE_CONFIG(ADDR_SURF_P2) |
2261 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2262 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2263 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2264 PIPE_CONFIG(ADDR_SURF_P2) |
2265 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2266 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2267 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2268 PIPE_CONFIG(ADDR_SURF_P2) |
2269 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2270 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2271 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2272 PIPE_CONFIG(ADDR_SURF_P2) |
2273 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2274 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2275 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2276 PIPE_CONFIG(ADDR_SURF_P2) |
2277 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2278 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2280 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2281 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2282 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2283 NUM_BANKS(ADDR_SURF_8_BANK));
2284 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2285 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2286 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2287 NUM_BANKS(ADDR_SURF_8_BANK));
2288 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2289 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2290 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2291 NUM_BANKS(ADDR_SURF_8_BANK));
2292 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2293 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2294 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2295 NUM_BANKS(ADDR_SURF_8_BANK));
2296 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2297 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2298 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2299 NUM_BANKS(ADDR_SURF_8_BANK));
2300 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2301 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2302 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2303 NUM_BANKS(ADDR_SURF_8_BANK));
2304 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2305 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2306 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2307 NUM_BANKS(ADDR_SURF_8_BANK));
2308 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2309 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2310 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2311 NUM_BANKS(ADDR_SURF_16_BANK));
2312 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2313 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2314 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2315 NUM_BANKS(ADDR_SURF_16_BANK));
2316 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2317 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2318 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2319 NUM_BANKS(ADDR_SURF_16_BANK));
2320 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2321 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2322 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2323 NUM_BANKS(ADDR_SURF_16_BANK));
2324 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2325 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2326 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2327 NUM_BANKS(ADDR_SURF_16_BANK));
2328 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2329 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2330 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2331 NUM_BANKS(ADDR_SURF_16_BANK));
2332 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2333 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2334 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2335 NUM_BANKS(ADDR_SURF_8_BANK));
2337 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2338 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2340 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2342 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2343 if (reg_offset != 7)
2344 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2348 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2349 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2350 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2351 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2352 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2353 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2354 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2355 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2356 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2357 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2358 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2359 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2360 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2361 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2362 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2363 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2364 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2365 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2366 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2367 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2368 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2369 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2370 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2371 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2372 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2373 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2374 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2375 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2376 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2377 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2378 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2379 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2380 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2381 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2382 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2383 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2384 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2385 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2386 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2387 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2388 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2389 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2390 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2391 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2392 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2393 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2394 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2395 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2396 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2397 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2398 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2399 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2400 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2401 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2402 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2403 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2404 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2405 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2406 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2407 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2408 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2409 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2410 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2411 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2412 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2413 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2414 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2415 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2416 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2417 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2418 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2419 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2420 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2421 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2422 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2423 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2424 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2425 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2426 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2427 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2428 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2429 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2430 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2431 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2432 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2433 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2434 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2435 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2436 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2437 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2438 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2439 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2440 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2441 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2442 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2443 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2444 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2445 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2446 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2447 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2448 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2449 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2450 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2451 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2452 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2453 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2454 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2455 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2456 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2457 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2458 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2459 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2460 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2461 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2462 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2463 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2464 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2465 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2466 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2467 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2468 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2469 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2471 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2472 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2473 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2474 NUM_BANKS(ADDR_SURF_8_BANK));
2475 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2476 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2477 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2478 NUM_BANKS(ADDR_SURF_8_BANK));
2479 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2480 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2481 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2482 NUM_BANKS(ADDR_SURF_8_BANK));
2483 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2484 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2485 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2486 NUM_BANKS(ADDR_SURF_8_BANK));
2487 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2488 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2489 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2490 NUM_BANKS(ADDR_SURF_8_BANK));
2491 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2492 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2493 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2494 NUM_BANKS(ADDR_SURF_8_BANK));
2495 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2496 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2497 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2498 NUM_BANKS(ADDR_SURF_8_BANK));
2499 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2500 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2501 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2502 NUM_BANKS(ADDR_SURF_8_BANK));
2503 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2504 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2505 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2506 NUM_BANKS(ADDR_SURF_8_BANK));
2507 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2508 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2509 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2510 NUM_BANKS(ADDR_SURF_8_BANK));
2511 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2512 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2513 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2514 NUM_BANKS(ADDR_SURF_8_BANK));
2515 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2516 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2517 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2518 NUM_BANKS(ADDR_SURF_8_BANK));
2519 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2520 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2521 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2522 NUM_BANKS(ADDR_SURF_8_BANK));
2523 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2524 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2525 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2526 NUM_BANKS(ADDR_SURF_4_BANK));
2528 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2529 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2531 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2532 if (reg_offset != 7)
2533 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2537 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2538 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2539 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2540 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2541 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2542 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2543 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2544 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2545 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2546 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2547 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2548 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2549 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2550 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2551 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2552 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2553 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2554 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2555 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2556 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2557 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2558 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2559 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2560 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2561 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2562 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2563 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2564 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2565 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2566 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2567 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2568 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2569 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2570 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2571 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2572 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2573 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2574 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2575 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2576 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2577 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2578 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2579 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2580 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2581 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2582 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2583 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2584 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2585 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2586 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2587 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2588 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2589 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2590 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2591 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2592 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2593 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2594 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2595 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2596 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2597 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2598 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2599 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2600 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2601 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2602 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2603 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2604 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2605 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2606 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2607 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2608 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2609 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2610 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2611 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2612 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2613 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2614 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2615 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2616 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2617 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2618 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2619 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2620 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2621 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2622 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2623 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2624 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2625 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2626 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2627 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2628 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2629 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2630 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2631 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2632 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2633 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2634 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2635 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2636 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2637 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2638 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2639 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2640 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2641 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2642 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2643 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2644 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2645 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2646 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2647 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2648 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2649 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2650 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2651 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2652 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2653 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2654 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2655 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2656 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2657 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2658 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2660 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2661 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2662 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2663 NUM_BANKS(ADDR_SURF_16_BANK));
2664 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2665 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2666 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2667 NUM_BANKS(ADDR_SURF_16_BANK));
2668 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2669 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2670 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2671 NUM_BANKS(ADDR_SURF_16_BANK));
2672 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2673 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2674 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2675 NUM_BANKS(ADDR_SURF_16_BANK));
2676 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2677 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2678 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2679 NUM_BANKS(ADDR_SURF_16_BANK));
2680 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2681 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2682 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2683 NUM_BANKS(ADDR_SURF_16_BANK));
2684 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2685 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2686 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2687 NUM_BANKS(ADDR_SURF_16_BANK));
2688 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2689 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2690 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2691 NUM_BANKS(ADDR_SURF_16_BANK));
2692 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2693 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2694 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2695 NUM_BANKS(ADDR_SURF_16_BANK));
2696 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2697 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2698 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2699 NUM_BANKS(ADDR_SURF_16_BANK));
2700 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2701 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2702 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2703 NUM_BANKS(ADDR_SURF_16_BANK));
2704 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2705 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2706 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2707 NUM_BANKS(ADDR_SURF_8_BANK));
2708 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2709 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2710 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2711 NUM_BANKS(ADDR_SURF_4_BANK));
2712 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2713 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2714 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2715 NUM_BANKS(ADDR_SURF_4_BANK));
2717 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2718 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2720 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2721 if (reg_offset != 7)
2722 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2725 case CHIP_POLARIS11:
2726 case CHIP_POLARIS12:
2727 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2728 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2729 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2730 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2731 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2732 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2733 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2734 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2735 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2736 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2737 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2738 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2739 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2740 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2741 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2742 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2743 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2744 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2745 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2746 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2747 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2748 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2749 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2750 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2751 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2752 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2753 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2754 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2755 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2756 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2757 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2758 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2759 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2760 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2761 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2762 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2763 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2764 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2765 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2766 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2767 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2768 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2769 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2770 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2771 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2772 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2773 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2774 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2775 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2776 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2777 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2778 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2779 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2780 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2781 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2782 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2783 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2784 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2785 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2786 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2787 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2788 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2789 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2790 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2791 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2792 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2793 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2794 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2795 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2796 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2797 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2798 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2799 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2800 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2801 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2802 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2803 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2804 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2805 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2806 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2807 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2808 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2809 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2810 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2811 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2812 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2813 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2814 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2815 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2816 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2817 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2818 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2819 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2820 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2821 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2822 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2823 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2824 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2825 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2826 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2827 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2828 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2829 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2830 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2831 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2832 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2833 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2834 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2835 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2836 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2837 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2838 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2839 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2840 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2841 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2842 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2843 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2844 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2845 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2846 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2847 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2848 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2850 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2851 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2852 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2853 NUM_BANKS(ADDR_SURF_16_BANK));
2855 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2856 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2857 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2858 NUM_BANKS(ADDR_SURF_16_BANK));
2860 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2861 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2862 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2863 NUM_BANKS(ADDR_SURF_16_BANK));
2865 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2866 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2867 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2868 NUM_BANKS(ADDR_SURF_16_BANK));
2870 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2871 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2872 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2873 NUM_BANKS(ADDR_SURF_16_BANK));
2875 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2876 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2877 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2878 NUM_BANKS(ADDR_SURF_16_BANK));
2880 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2881 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2882 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2883 NUM_BANKS(ADDR_SURF_16_BANK));
2885 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2886 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2887 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2888 NUM_BANKS(ADDR_SURF_16_BANK));
2890 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2891 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2892 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2893 NUM_BANKS(ADDR_SURF_16_BANK));
2895 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2896 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2897 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2898 NUM_BANKS(ADDR_SURF_16_BANK));
2900 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2901 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2902 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2903 NUM_BANKS(ADDR_SURF_16_BANK));
2905 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2906 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2907 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2908 NUM_BANKS(ADDR_SURF_16_BANK));
2910 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2911 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2912 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2913 NUM_BANKS(ADDR_SURF_8_BANK));
2915 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2916 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2917 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2918 NUM_BANKS(ADDR_SURF_4_BANK));
2920 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2921 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2923 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2924 if (reg_offset != 7)
2925 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2928 case CHIP_POLARIS10:
2929 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2930 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2931 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2932 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2933 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2934 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2935 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2936 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2937 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2938 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2939 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2940 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2941 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2942 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2943 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2944 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2945 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2946 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2947 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2948 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2949 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2950 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2951 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2952 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2953 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2954 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2955 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2956 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2957 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2958 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2959 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2960 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2961 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2962 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2963 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2964 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2965 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2966 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2967 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2968 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2969 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2970 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2971 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2972 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2973 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2974 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2975 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2976 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2977 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2978 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2979 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2980 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2981 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2982 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2983 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2984 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2985 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2986 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2987 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2988 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2989 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2990 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2991 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2992 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2993 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2994 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2995 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2996 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2997 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2998 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2999 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3000 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3001 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3002 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3003 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3004 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3005 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3006 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3007 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3008 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3009 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3010 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3011 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3012 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3013 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3014 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3015 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3016 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3017 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3018 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3019 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3020 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3021 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3022 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3023 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3024 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3025 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3026 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3027 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3028 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3029 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3030 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3031 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3032 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3033 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3034 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3035 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3036 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3037 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3038 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3039 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3040 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3041 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3042 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3043 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3044 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3045 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3046 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3047 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3048 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3049 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3050 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3052 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3053 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3054 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3055 NUM_BANKS(ADDR_SURF_16_BANK));
3057 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3058 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3059 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3060 NUM_BANKS(ADDR_SURF_16_BANK));
3062 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3063 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3064 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3065 NUM_BANKS(ADDR_SURF_16_BANK));
3067 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3068 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3069 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3070 NUM_BANKS(ADDR_SURF_16_BANK));
3072 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3073 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3074 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3075 NUM_BANKS(ADDR_SURF_16_BANK));
3077 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3078 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3079 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3080 NUM_BANKS(ADDR_SURF_16_BANK));
3082 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3083 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3084 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3085 NUM_BANKS(ADDR_SURF_16_BANK));
3087 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3088 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3089 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3090 NUM_BANKS(ADDR_SURF_16_BANK));
3092 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3093 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3094 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3095 NUM_BANKS(ADDR_SURF_16_BANK));
3097 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3098 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3099 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3100 NUM_BANKS(ADDR_SURF_16_BANK));
3102 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3103 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3104 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3105 NUM_BANKS(ADDR_SURF_16_BANK));
3107 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3108 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3109 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3110 NUM_BANKS(ADDR_SURF_8_BANK));
3112 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3113 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3114 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3115 NUM_BANKS(ADDR_SURF_4_BANK));
3117 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3118 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3119 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3120 NUM_BANKS(ADDR_SURF_4_BANK));
3122 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3123 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3125 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3126 if (reg_offset != 7)
3127 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3131 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3132 PIPE_CONFIG(ADDR_SURF_P2) |
3133 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3134 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3135 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3136 PIPE_CONFIG(ADDR_SURF_P2) |
3137 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3138 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3139 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3140 PIPE_CONFIG(ADDR_SURF_P2) |
3141 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3142 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3143 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3144 PIPE_CONFIG(ADDR_SURF_P2) |
3145 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3146 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3147 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3148 PIPE_CONFIG(ADDR_SURF_P2) |
3149 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3150 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3151 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3152 PIPE_CONFIG(ADDR_SURF_P2) |
3153 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3154 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3155 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3156 PIPE_CONFIG(ADDR_SURF_P2) |
3157 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3158 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3159 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3160 PIPE_CONFIG(ADDR_SURF_P2));
3161 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3162 PIPE_CONFIG(ADDR_SURF_P2) |
3163 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3164 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3165 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3166 PIPE_CONFIG(ADDR_SURF_P2) |
3167 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3168 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3169 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3170 PIPE_CONFIG(ADDR_SURF_P2) |
3171 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3172 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3173 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3174 PIPE_CONFIG(ADDR_SURF_P2) |
3175 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3176 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3177 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3178 PIPE_CONFIG(ADDR_SURF_P2) |
3179 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3180 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3181 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3182 PIPE_CONFIG(ADDR_SURF_P2) |
3183 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3184 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3185 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3186 PIPE_CONFIG(ADDR_SURF_P2) |
3187 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3188 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3189 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3190 PIPE_CONFIG(ADDR_SURF_P2) |
3191 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3192 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3193 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3194 PIPE_CONFIG(ADDR_SURF_P2) |
3195 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3196 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3197 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3198 PIPE_CONFIG(ADDR_SURF_P2) |
3199 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3200 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3201 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3202 PIPE_CONFIG(ADDR_SURF_P2) |
3203 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3204 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3205 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3206 PIPE_CONFIG(ADDR_SURF_P2) |
3207 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3208 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3209 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3210 PIPE_CONFIG(ADDR_SURF_P2) |
3211 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3212 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3213 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3214 PIPE_CONFIG(ADDR_SURF_P2) |
3215 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3216 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3217 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3218 PIPE_CONFIG(ADDR_SURF_P2) |
3219 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3220 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3221 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3222 PIPE_CONFIG(ADDR_SURF_P2) |
3223 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3224 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3225 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3226 PIPE_CONFIG(ADDR_SURF_P2) |
3227 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3228 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3229 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3230 PIPE_CONFIG(ADDR_SURF_P2) |
3231 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3232 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3234 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3235 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3236 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3237 NUM_BANKS(ADDR_SURF_8_BANK));
3238 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3239 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3240 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3241 NUM_BANKS(ADDR_SURF_8_BANK));
3242 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3243 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3244 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3245 NUM_BANKS(ADDR_SURF_8_BANK));
3246 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3247 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3248 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3249 NUM_BANKS(ADDR_SURF_8_BANK));
3250 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3251 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3252 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3253 NUM_BANKS(ADDR_SURF_8_BANK));
3254 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3255 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3256 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3257 NUM_BANKS(ADDR_SURF_8_BANK));
3258 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3259 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3260 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3261 NUM_BANKS(ADDR_SURF_8_BANK));
3262 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3263 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3264 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3265 NUM_BANKS(ADDR_SURF_16_BANK));
3266 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3267 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3268 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3269 NUM_BANKS(ADDR_SURF_16_BANK));
3270 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3271 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3272 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3273 NUM_BANKS(ADDR_SURF_16_BANK));
3274 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3275 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3276 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3277 NUM_BANKS(ADDR_SURF_16_BANK));
3278 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3279 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3280 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3281 NUM_BANKS(ADDR_SURF_16_BANK));
3282 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3283 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3284 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3285 NUM_BANKS(ADDR_SURF_16_BANK));
3286 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3287 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3288 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3289 NUM_BANKS(ADDR_SURF_8_BANK));
3291 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3292 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3294 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3296 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3297 if (reg_offset != 7)
3298 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3303 "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
3307 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3308 PIPE_CONFIG(ADDR_SURF_P2) |
3309 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3310 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3311 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3312 PIPE_CONFIG(ADDR_SURF_P2) |
3313 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3314 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3315 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3316 PIPE_CONFIG(ADDR_SURF_P2) |
3317 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3318 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3319 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3320 PIPE_CONFIG(ADDR_SURF_P2) |
3321 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3322 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3323 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3324 PIPE_CONFIG(ADDR_SURF_P2) |
3325 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3326 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3327 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3328 PIPE_CONFIG(ADDR_SURF_P2) |
3329 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3330 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3331 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3332 PIPE_CONFIG(ADDR_SURF_P2) |
3333 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3334 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3335 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3336 PIPE_CONFIG(ADDR_SURF_P2));
3337 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3338 PIPE_CONFIG(ADDR_SURF_P2) |
3339 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3340 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3341 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3342 PIPE_CONFIG(ADDR_SURF_P2) |
3343 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3344 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3345 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3346 PIPE_CONFIG(ADDR_SURF_P2) |
3347 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3348 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3349 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3350 PIPE_CONFIG(ADDR_SURF_P2) |
3351 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3352 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3353 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3354 PIPE_CONFIG(ADDR_SURF_P2) |
3355 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3356 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3357 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3358 PIPE_CONFIG(ADDR_SURF_P2) |
3359 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3360 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3361 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3362 PIPE_CONFIG(ADDR_SURF_P2) |
3363 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3364 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3365 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3366 PIPE_CONFIG(ADDR_SURF_P2) |
3367 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3368 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3369 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3370 PIPE_CONFIG(ADDR_SURF_P2) |
3371 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3372 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3373 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3374 PIPE_CONFIG(ADDR_SURF_P2) |
3375 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3376 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3377 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3378 PIPE_CONFIG(ADDR_SURF_P2) |
3379 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3380 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3381 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3382 PIPE_CONFIG(ADDR_SURF_P2) |
3383 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3384 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3385 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3386 PIPE_CONFIG(ADDR_SURF_P2) |
3387 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3388 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3389 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3390 PIPE_CONFIG(ADDR_SURF_P2) |
3391 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3392 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3393 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3394 PIPE_CONFIG(ADDR_SURF_P2) |
3395 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3396 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3397 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3398 PIPE_CONFIG(ADDR_SURF_P2) |
3399 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3400 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3401 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3402 PIPE_CONFIG(ADDR_SURF_P2) |
3403 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3404 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3405 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3406 PIPE_CONFIG(ADDR_SURF_P2) |
3407 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3408 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3410 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3411 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3412 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3413 NUM_BANKS(ADDR_SURF_8_BANK));
3414 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3415 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3416 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3417 NUM_BANKS(ADDR_SURF_8_BANK));
3418 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3419 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3420 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3421 NUM_BANKS(ADDR_SURF_8_BANK));
3422 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3423 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3424 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3425 NUM_BANKS(ADDR_SURF_8_BANK));
3426 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3427 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3428 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3429 NUM_BANKS(ADDR_SURF_8_BANK));
3430 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3431 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3432 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3433 NUM_BANKS(ADDR_SURF_8_BANK));
3434 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3435 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3436 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3437 NUM_BANKS(ADDR_SURF_8_BANK));
3438 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3439 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3440 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3441 NUM_BANKS(ADDR_SURF_16_BANK));
3442 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3443 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3444 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3445 NUM_BANKS(ADDR_SURF_16_BANK));
3446 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3447 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3448 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3449 NUM_BANKS(ADDR_SURF_16_BANK));
3450 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3451 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3452 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3453 NUM_BANKS(ADDR_SURF_16_BANK));
3454 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3455 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3456 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3457 NUM_BANKS(ADDR_SURF_16_BANK));
3458 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3459 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3460 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3461 NUM_BANKS(ADDR_SURF_16_BANK));
3462 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3463 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3464 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3465 NUM_BANKS(ADDR_SURF_8_BANK));
3467 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3468 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3470 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3472 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3473 if (reg_offset != 7)
3474 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3480 static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
3481 u32 se_num, u32 sh_num, u32 instance)
3485 if (instance == 0xffffffff)
3486 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
3488 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
3490 if (se_num == 0xffffffff)
3491 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3493 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3495 if (sh_num == 0xffffffff)
3496 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3498 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3500 WREG32(mmGRBM_GFX_INDEX, data);
3503 static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
3507 data = RREG32(mmCC_RB_BACKEND_DISABLE) |
3508 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3510 data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE);
3512 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
3513 adev->gfx.config.max_sh_per_se);
3515 return (~data) & mask;
3519 gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1)
3521 switch (adev->asic_type) {
3523 *rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) |
3524 RB_XSEL2(1) | PKR_MAP(2) |
3525 PKR_XSEL(1) | PKR_YSEL(1) |
3526 SE_MAP(2) | SE_XSEL(2) | SE_YSEL(3);
3527 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) |
3531 case CHIP_POLARIS10:
3532 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3533 SE_XSEL(1) | SE_YSEL(1);
3534 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(2) |
3539 *rconf |= RB_MAP_PKR0(2);
3542 case CHIP_POLARIS11:
3543 case CHIP_POLARIS12:
3544 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3545 SE_XSEL(1) | SE_YSEL(1);
3553 DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type);
3559 gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev,
3560 u32 raster_config, u32 raster_config_1,
3561 unsigned rb_mask, unsigned num_rb)
3563 unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1);
3564 unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1);
3565 unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2);
3566 unsigned rb_per_se = num_rb / num_se;
3567 unsigned se_mask[4];
3570 se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask;
3571 se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask;
3572 se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask;
3573 se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask;
3575 WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4));
3576 WARN_ON(!(sh_per_se == 1 || sh_per_se == 2));
3577 WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2));
3579 if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
3580 (!se_mask[2] && !se_mask[3]))) {
3581 raster_config_1 &= ~SE_PAIR_MAP_MASK;
3583 if (!se_mask[0] && !se_mask[1]) {
3585 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3);
3588 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0);
3592 for (se = 0; se < num_se; se++) {
3593 unsigned raster_config_se = raster_config;
3594 unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
3595 unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
3596 int idx = (se / 2) * 2;
3598 if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
3599 raster_config_se &= ~SE_MAP_MASK;
3601 if (!se_mask[idx]) {
3602 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3);
3604 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0);
3608 pkr0_mask &= rb_mask;
3609 pkr1_mask &= rb_mask;
3610 if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
3611 raster_config_se &= ~PKR_MAP_MASK;
3614 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3);
3616 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0);
3620 if (rb_per_se >= 2) {
3621 unsigned rb0_mask = 1 << (se * rb_per_se);
3622 unsigned rb1_mask = rb0_mask << 1;
3624 rb0_mask &= rb_mask;
3625 rb1_mask &= rb_mask;
3626 if (!rb0_mask || !rb1_mask) {
3627 raster_config_se &= ~RB_MAP_PKR0_MASK;
3631 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3);
3634 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0);
3638 if (rb_per_se > 2) {
3639 rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
3640 rb1_mask = rb0_mask << 1;
3641 rb0_mask &= rb_mask;
3642 rb1_mask &= rb_mask;
3643 if (!rb0_mask || !rb1_mask) {
3644 raster_config_se &= ~RB_MAP_PKR1_MASK;
3648 RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3);
3651 RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0);
3657 /* GRBM_GFX_INDEX has a different offset on VI */
3658 gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff);
3659 WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se);
3660 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3663 /* GRBM_GFX_INDEX has a different offset on VI */
3664 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3667 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
3671 u32 raster_config = 0, raster_config_1 = 0;
3673 u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
3674 adev->gfx.config.max_sh_per_se;
3675 unsigned num_rb_pipes;
3677 mutex_lock(&adev->grbm_idx_mutex);
3678 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3679 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3680 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3681 data = gfx_v8_0_get_rb_active_bitmap(adev);
3682 active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
3683 rb_bitmap_width_per_sh);
3686 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3688 adev->gfx.config.backend_enable_mask = active_rbs;
3689 adev->gfx.config.num_rbs = hweight32(active_rbs);
3691 num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se *
3692 adev->gfx.config.max_shader_engines, 16);
3694 gfx_v8_0_raster_config(adev, &raster_config, &raster_config_1);
3696 if (!adev->gfx.config.backend_enable_mask ||
3697 adev->gfx.config.num_rbs >= num_rb_pipes) {
3698 WREG32(mmPA_SC_RASTER_CONFIG, raster_config);
3699 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3701 gfx_v8_0_write_harvested_raster_configs(adev, raster_config, raster_config_1,
3702 adev->gfx.config.backend_enable_mask,
3706 /* cache the values for userspace */
3707 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3708 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3709 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3710 adev->gfx.config.rb_config[i][j].rb_backend_disable =
3711 RREG32(mmCC_RB_BACKEND_DISABLE);
3712 adev->gfx.config.rb_config[i][j].user_rb_backend_disable =
3713 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3714 adev->gfx.config.rb_config[i][j].raster_config =
3715 RREG32(mmPA_SC_RASTER_CONFIG);
3716 adev->gfx.config.rb_config[i][j].raster_config_1 =
3717 RREG32(mmPA_SC_RASTER_CONFIG_1);
3720 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3721 mutex_unlock(&adev->grbm_idx_mutex);
3725 * gfx_v8_0_init_compute_vmid - gart enable
3727 * @adev: amdgpu_device pointer
3729 * Initialize compute vmid sh_mem registers
3732 #define DEFAULT_SH_MEM_BASES (0x6000)
3733 #define FIRST_COMPUTE_VMID (8)
3734 #define LAST_COMPUTE_VMID (16)
3735 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
3738 uint32_t sh_mem_config;
3739 uint32_t sh_mem_bases;
3742 * Configure apertures:
3743 * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB)
3744 * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB)
3745 * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB)
3747 sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
3749 sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
3750 SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
3751 SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
3752 SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
3753 MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
3754 SH_MEM_CONFIG__PRIVATE_ATC_MASK;
3756 mutex_lock(&adev->srbm_mutex);
3757 for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
3758 vi_srbm_select(adev, 0, 0, 0, i);
3759 /* CP and shaders */
3760 WREG32(mmSH_MEM_CONFIG, sh_mem_config);
3761 WREG32(mmSH_MEM_APE1_BASE, 1);
3762 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3763 WREG32(mmSH_MEM_BASES, sh_mem_bases);
3765 vi_srbm_select(adev, 0, 0, 0, 0);
3766 mutex_unlock(&adev->srbm_mutex);
3769 static void gfx_v8_0_config_init(struct amdgpu_device *adev)
3771 switch (adev->asic_type) {
3773 adev->gfx.config.double_offchip_lds_buf = 1;
3777 adev->gfx.config.double_offchip_lds_buf = 0;
3782 static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
3784 u32 tmp, sh_static_mem_cfg;
3787 WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF);
3788 WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3789 WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3790 WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
3792 gfx_v8_0_tiling_mode_table_init(adev);
3793 gfx_v8_0_setup_rb(adev);
3794 gfx_v8_0_get_cu_info(adev);
3795 gfx_v8_0_config_init(adev);
3797 /* XXX SH_MEM regs */
3798 /* where to put LDS, scratch, GPUVM in FSA64 space */
3799 sh_static_mem_cfg = REG_SET_FIELD(0, SH_STATIC_MEM_CONFIG,
3801 sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3803 sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3805 mutex_lock(&adev->srbm_mutex);
3806 for (i = 0; i < adev->vm_manager.id_mgr[0].num_ids; i++) {
3807 vi_srbm_select(adev, 0, 0, 0, i);
3808 /* CP and shaders */
3810 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
3811 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3812 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3813 SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3814 WREG32(mmSH_MEM_CONFIG, tmp);
3815 WREG32(mmSH_MEM_BASES, 0);
3817 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
3818 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3819 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3820 SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3821 WREG32(mmSH_MEM_CONFIG, tmp);
3822 tmp = adev->mc.shared_aperture_start >> 48;
3823 WREG32(mmSH_MEM_BASES, tmp);
3826 WREG32(mmSH_MEM_APE1_BASE, 1);
3827 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3828 WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg);
3830 vi_srbm_select(adev, 0, 0, 0, 0);
3831 mutex_unlock(&adev->srbm_mutex);
3833 gfx_v8_0_init_compute_vmid(adev);
3835 mutex_lock(&adev->grbm_idx_mutex);
3837 * making sure that the following register writes will be broadcasted
3838 * to all the shaders
3840 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3842 WREG32(mmPA_SC_FIFO_SIZE,
3843 (adev->gfx.config.sc_prim_fifo_size_frontend <<
3844 PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
3845 (adev->gfx.config.sc_prim_fifo_size_backend <<
3846 PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
3847 (adev->gfx.config.sc_hiz_tile_fifo_size <<
3848 PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
3849 (adev->gfx.config.sc_earlyz_tile_fifo_size <<
3850 PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
3852 tmp = RREG32(mmSPI_ARB_PRIORITY);
3853 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS0, 2);
3854 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS1, 2);
3855 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS2, 2);
3856 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS3, 2);
3857 WREG32(mmSPI_ARB_PRIORITY, tmp);
3859 mutex_unlock(&adev->grbm_idx_mutex);
3863 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3868 mutex_lock(&adev->grbm_idx_mutex);
3869 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3870 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3871 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3872 for (k = 0; k < adev->usec_timeout; k++) {
3873 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3879 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3880 mutex_unlock(&adev->grbm_idx_mutex);
3882 mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3883 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3884 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3885 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3886 for (k = 0; k < adev->usec_timeout; k++) {
3887 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3893 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3896 u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3898 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
3899 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
3900 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
3901 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
3903 WREG32(mmCP_INT_CNTL_RING0, tmp);
3906 static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
3909 WREG32(mmRLC_CSIB_ADDR_HI,
3910 adev->gfx.rlc.clear_state_gpu_addr >> 32);
3911 WREG32(mmRLC_CSIB_ADDR_LO,
3912 adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
3913 WREG32(mmRLC_CSIB_LENGTH,
3914 adev->gfx.rlc.clear_state_size);
3917 static void gfx_v8_0_parse_ind_reg_list(int *register_list_format,
3920 int *unique_indices,
3923 int *ind_start_offsets,
3928 bool new_entry = true;
3930 for (; ind_offset < list_size; ind_offset++) {
3934 ind_start_offsets[*offset_count] = ind_offset;
3935 *offset_count = *offset_count + 1;
3936 BUG_ON(*offset_count >= max_offset);
3939 if (register_list_format[ind_offset] == 0xFFFFFFFF) {
3946 /* look for the matching indice */
3948 indices < *indices_count;
3950 if (unique_indices[indices] ==
3951 register_list_format[ind_offset])
3955 if (indices >= *indices_count) {
3956 unique_indices[*indices_count] =
3957 register_list_format[ind_offset];
3958 indices = *indices_count;
3959 *indices_count = *indices_count + 1;
3960 BUG_ON(*indices_count >= max_indices);
3963 register_list_format[ind_offset] = indices;
3967 static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
3970 int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0};
3971 int indices_count = 0;
3972 int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3973 int offset_count = 0;
3976 unsigned int *register_list_format =
3977 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
3978 if (!register_list_format)
3980 memcpy(register_list_format, adev->gfx.rlc.register_list_format,
3981 adev->gfx.rlc.reg_list_format_size_bytes);
3983 gfx_v8_0_parse_ind_reg_list(register_list_format,
3984 RLC_FormatDirectRegListLength,
3985 adev->gfx.rlc.reg_list_format_size_bytes >> 2,
3988 sizeof(unique_indices) / sizeof(int),
3989 indirect_start_offsets,
3991 sizeof(indirect_start_offsets)/sizeof(int));
3993 /* save and restore list */
3994 WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1);
3996 WREG32(mmRLC_SRM_ARAM_ADDR, 0);
3997 for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
3998 WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]);
4001 WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start);
4002 for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
4003 WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]);
4005 list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
4006 list_size = list_size >> 1;
4007 WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size);
4008 WREG32(mmRLC_GPM_SCRATCH_DATA, list_size);
4010 /* starting offsets starts */
4011 WREG32(mmRLC_GPM_SCRATCH_ADDR,
4012 adev->gfx.rlc.starting_offsets_start);
4013 for (i = 0; i < sizeof(indirect_start_offsets)/sizeof(int); i++)
4014 WREG32(mmRLC_GPM_SCRATCH_DATA,
4015 indirect_start_offsets[i]);
4017 /* unique indices */
4018 temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
4019 data = mmRLC_SRM_INDEX_CNTL_DATA_0;
4020 for (i = 0; i < sizeof(unique_indices) / sizeof(int); i++) {
4021 if (unique_indices[i] != 0) {
4022 WREG32(temp + i, unique_indices[i] & 0x3FFFF);
4023 WREG32(data + i, unique_indices[i] >> 20);
4026 kfree(register_list_format);
4031 static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
4033 WREG32_FIELD(RLC_SRM_CNTL, SRM_ENABLE, 1);
4036 static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev)
4040 WREG32_FIELD(CP_RB_WPTR_POLL_CNTL, IDLE_POLL_COUNT, 0x60);
4042 data = REG_SET_FIELD(0, RLC_PG_DELAY, POWER_UP_DELAY, 0x10);
4043 data = REG_SET_FIELD(data, RLC_PG_DELAY, POWER_DOWN_DELAY, 0x10);
4044 data = REG_SET_FIELD(data, RLC_PG_DELAY, CMD_PROPAGATE_DELAY, 0x10);
4045 data = REG_SET_FIELD(data, RLC_PG_DELAY, MEM_SLEEP_DELAY, 0x10);
4046 WREG32(mmRLC_PG_DELAY, data);
4048 WREG32_FIELD(RLC_PG_DELAY_2, SERDES_CMD_DELAY, 0x3);
4049 WREG32_FIELD(RLC_AUTO_PG_CTRL, GRBM_REG_SAVE_GFX_IDLE_THRESHOLD, 0x55f0);
4053 static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
4056 WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PU_ENABLE, enable ? 1 : 0);
4059 static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
4062 WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PD_ENABLE, enable ? 1 : 0);
4065 static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable)
4067 WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 0 : 1);
4070 static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
4072 if ((adev->asic_type == CHIP_CARRIZO) ||
4073 (adev->asic_type == CHIP_STONEY)) {
4074 gfx_v8_0_init_csb(adev);
4075 gfx_v8_0_init_save_restore_list(adev);
4076 gfx_v8_0_enable_save_restore_machine(adev);
4077 WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
4078 gfx_v8_0_init_power_gating(adev);
4079 WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
4080 } else if ((adev->asic_type == CHIP_POLARIS11) ||
4081 (adev->asic_type == CHIP_POLARIS12)) {
4082 gfx_v8_0_init_csb(adev);
4083 gfx_v8_0_init_save_restore_list(adev);
4084 gfx_v8_0_enable_save_restore_machine(adev);
4085 gfx_v8_0_init_power_gating(adev);
4090 static void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
4092 WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 0);
4094 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4095 gfx_v8_0_wait_for_rlc_serdes(adev);
4098 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
4100 WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4103 WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
4107 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
4109 WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 1);
4111 /* carrizo do enable cp interrupt after cp inited */
4112 if (!(adev->flags & AMD_IS_APU))
4113 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4118 static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev)
4120 const struct rlc_firmware_header_v2_0 *hdr;
4121 const __le32 *fw_data;
4122 unsigned i, fw_size;
4124 if (!adev->gfx.rlc_fw)
4127 hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
4128 amdgpu_ucode_print_rlc_hdr(&hdr->header);
4130 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
4131 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
4132 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
4134 WREG32(mmRLC_GPM_UCODE_ADDR, 0);
4135 for (i = 0; i < fw_size; i++)
4136 WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
4137 WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
4142 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
4147 gfx_v8_0_rlc_stop(adev);
4150 tmp = RREG32(mmRLC_CGCG_CGLS_CTRL);
4151 tmp &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
4152 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4153 WREG32(mmRLC_CGCG_CGLS_CTRL, tmp);
4154 if (adev->asic_type == CHIP_POLARIS11 ||
4155 adev->asic_type == CHIP_POLARIS10 ||
4156 adev->asic_type == CHIP_POLARIS12) {
4157 tmp = RREG32(mmRLC_CGCG_CGLS_CTRL_3D);
4159 WREG32(mmRLC_CGCG_CGLS_CTRL_3D, tmp);
4163 WREG32(mmRLC_PG_CNTL, 0);
4165 gfx_v8_0_rlc_reset(adev);
4166 gfx_v8_0_init_pg(adev);
4168 if (!adev->pp_enabled) {
4169 if (adev->firmware.load_type != AMDGPU_FW_LOAD_SMU) {
4170 /* legacy rlc firmware loading */
4171 r = gfx_v8_0_rlc_load_microcode(adev);
4175 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4176 AMDGPU_UCODE_ID_RLC_G);
4182 gfx_v8_0_rlc_start(adev);
4187 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
4190 u32 tmp = RREG32(mmCP_ME_CNTL);
4193 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
4194 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
4195 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
4197 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
4198 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
4199 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
4200 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
4201 adev->gfx.gfx_ring[i].ready = false;
4203 WREG32(mmCP_ME_CNTL, tmp);
4207 static int gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
4209 const struct gfx_firmware_header_v1_0 *pfp_hdr;
4210 const struct gfx_firmware_header_v1_0 *ce_hdr;
4211 const struct gfx_firmware_header_v1_0 *me_hdr;
4212 const __le32 *fw_data;
4213 unsigned i, fw_size;
4215 if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
4218 pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
4219 adev->gfx.pfp_fw->data;
4220 ce_hdr = (const struct gfx_firmware_header_v1_0 *)
4221 adev->gfx.ce_fw->data;
4222 me_hdr = (const struct gfx_firmware_header_v1_0 *)
4223 adev->gfx.me_fw->data;
4225 amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
4226 amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
4227 amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
4229 gfx_v8_0_cp_gfx_enable(adev, false);
4232 fw_data = (const __le32 *)
4233 (adev->gfx.pfp_fw->data +
4234 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
4235 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
4236 WREG32(mmCP_PFP_UCODE_ADDR, 0);
4237 for (i = 0; i < fw_size; i++)
4238 WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
4239 WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
4242 fw_data = (const __le32 *)
4243 (adev->gfx.ce_fw->data +
4244 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
4245 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
4246 WREG32(mmCP_CE_UCODE_ADDR, 0);
4247 for (i = 0; i < fw_size; i++)
4248 WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
4249 WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
4252 fw_data = (const __le32 *)
4253 (adev->gfx.me_fw->data +
4254 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
4255 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
4256 WREG32(mmCP_ME_RAM_WADDR, 0);
4257 for (i = 0; i < fw_size; i++)
4258 WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
4259 WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
4264 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
4267 const struct cs_section_def *sect = NULL;
4268 const struct cs_extent_def *ext = NULL;
4270 /* begin clear state */
4272 /* context control state */
4275 for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4276 for (ext = sect->section; ext->extent != NULL; ++ext) {
4277 if (sect->id == SECT_CONTEXT)
4278 count += 2 + ext->reg_count;
4283 /* pa_sc_raster_config/pa_sc_raster_config1 */
4285 /* end clear state */
4293 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
4295 struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
4296 const struct cs_section_def *sect = NULL;
4297 const struct cs_extent_def *ext = NULL;
4301 WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
4302 WREG32(mmCP_ENDIAN_SWAP, 0);
4303 WREG32(mmCP_DEVICE_ID, 1);
4305 gfx_v8_0_cp_gfx_enable(adev, true);
4307 r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
4309 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
4313 /* clear state buffer */
4314 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4315 amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4317 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4318 amdgpu_ring_write(ring, 0x80000000);
4319 amdgpu_ring_write(ring, 0x80000000);
4321 for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4322 for (ext = sect->section; ext->extent != NULL; ++ext) {
4323 if (sect->id == SECT_CONTEXT) {
4324 amdgpu_ring_write(ring,
4325 PACKET3(PACKET3_SET_CONTEXT_REG,
4327 amdgpu_ring_write(ring,
4328 ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
4329 for (i = 0; i < ext->reg_count; i++)
4330 amdgpu_ring_write(ring, ext->extent[i]);
4335 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4336 amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
4337 switch (adev->asic_type) {
4339 case CHIP_POLARIS10:
4340 amdgpu_ring_write(ring, 0x16000012);
4341 amdgpu_ring_write(ring, 0x0000002A);
4343 case CHIP_POLARIS11:
4344 case CHIP_POLARIS12:
4345 amdgpu_ring_write(ring, 0x16000012);
4346 amdgpu_ring_write(ring, 0x00000000);
4349 amdgpu_ring_write(ring, 0x3a00161a);
4350 amdgpu_ring_write(ring, 0x0000002e);
4353 amdgpu_ring_write(ring, 0x00000002);
4354 amdgpu_ring_write(ring, 0x00000000);
4357 amdgpu_ring_write(ring, adev->gfx.config.num_rbs == 1 ?
4358 0x00000000 : 0x00000002);
4359 amdgpu_ring_write(ring, 0x00000000);
4362 amdgpu_ring_write(ring, 0x00000000);
4363 amdgpu_ring_write(ring, 0x00000000);
4369 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4370 amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4372 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4373 amdgpu_ring_write(ring, 0);
4375 /* init the CE partitions */
4376 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4377 amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4378 amdgpu_ring_write(ring, 0x8000);
4379 amdgpu_ring_write(ring, 0x8000);
4381 amdgpu_ring_commit(ring);
4385 static void gfx_v8_0_set_cpg_door_bell(struct amdgpu_device *adev, struct amdgpu_ring *ring)
4388 /* no gfx doorbells on iceland */
4389 if (adev->asic_type == CHIP_TOPAZ)
4392 tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
4394 if (ring->use_doorbell) {
4395 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4396 DOORBELL_OFFSET, ring->doorbell_index);
4397 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4399 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4402 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
4405 WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
4407 if (adev->flags & AMD_IS_APU)
4410 tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
4411 DOORBELL_RANGE_LOWER,
4412 AMDGPU_DOORBELL_GFX_RING0);
4413 WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
4415 WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
4416 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
4419 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
4421 struct amdgpu_ring *ring;
4424 u64 rb_addr, rptr_addr, wptr_gpu_addr;
4427 /* Set the write pointer delay */
4428 WREG32(mmCP_RB_WPTR_DELAY, 0);
4430 /* set the RB to use vmid 0 */
4431 WREG32(mmCP_RB_VMID, 0);
4433 /* Set ring buffer size */
4434 ring = &adev->gfx.gfx_ring[0];
4435 rb_bufsz = order_base_2(ring->ring_size / 8);
4436 tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
4437 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
4438 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
4439 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
4441 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
4443 WREG32(mmCP_RB0_CNTL, tmp);
4445 /* Initialize the ring buffer's read and write pointers */
4446 WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
4448 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
4450 /* set the wb address wether it's enabled or not */
4451 rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4452 WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
4453 WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
4455 wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4456 WREG32(mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
4457 WREG32(mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
4459 WREG32(mmCP_RB0_CNTL, tmp);
4461 rb_addr = ring->gpu_addr >> 8;
4462 WREG32(mmCP_RB0_BASE, rb_addr);
4463 WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
4465 gfx_v8_0_set_cpg_door_bell(adev, ring);
4466 /* start the ring */
4467 amdgpu_ring_clear_ring(ring);
4468 gfx_v8_0_cp_gfx_start(adev);
4470 r = amdgpu_ring_test_ring(ring);
4472 ring->ready = false;
4477 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
4482 WREG32(mmCP_MEC_CNTL, 0);
4484 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
4485 for (i = 0; i < adev->gfx.num_compute_rings; i++)
4486 adev->gfx.compute_ring[i].ready = false;
4487 adev->gfx.kiq.ring.ready = false;
4492 static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev)
4494 const struct gfx_firmware_header_v1_0 *mec_hdr;
4495 const __le32 *fw_data;
4496 unsigned i, fw_size;
4498 if (!adev->gfx.mec_fw)
4501 gfx_v8_0_cp_compute_enable(adev, false);
4503 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
4504 amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
4506 fw_data = (const __le32 *)
4507 (adev->gfx.mec_fw->data +
4508 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4509 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4512 WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0);
4513 for (i = 0; i < fw_size; i++)
4514 WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data+i));
4515 WREG32(mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version);
4517 /* Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
4518 if (adev->gfx.mec2_fw) {
4519 const struct gfx_firmware_header_v1_0 *mec2_hdr;
4521 mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
4522 amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header);
4524 fw_data = (const __le32 *)
4525 (adev->gfx.mec2_fw->data +
4526 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4527 fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4529 WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0);
4530 for (i = 0; i < fw_size; i++)
4531 WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data+i));
4532 WREG32(mmCP_MEC_ME2_UCODE_ADDR, adev->gfx.mec2_fw_version);
4539 static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring)
4542 struct amdgpu_device *adev = ring->adev;
4544 /* tell RLC which is KIQ queue */
4545 tmp = RREG32(mmRLC_CP_SCHEDULERS);
4547 tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
4548 WREG32(mmRLC_CP_SCHEDULERS, tmp);
4550 WREG32(mmRLC_CP_SCHEDULERS, tmp);
4553 static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev)
4555 struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
4556 uint32_t scratch, tmp = 0;
4557 uint64_t queue_mask = 0;
4560 for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
4561 if (!test_bit(i, adev->gfx.mec.queue_bitmap))
4564 /* This situation may be hit in the future if a new HW
4565 * generation exposes more than 64 queues. If so, the
4566 * definition of queue_mask needs updating */
4567 if (WARN_ON(i > (sizeof(queue_mask)*8))) {
4568 DRM_ERROR("Invalid KCQ enabled: %d\n", i);
4572 queue_mask |= (1ull << i);
4575 r = amdgpu_gfx_scratch_get(adev, &scratch);
4577 DRM_ERROR("Failed to get scratch reg (%d).\n", r);
4580 WREG32(scratch, 0xCAFEDEAD);
4582 r = amdgpu_ring_alloc(kiq_ring, (8 * adev->gfx.num_compute_rings) + 11);
4584 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
4585 amdgpu_gfx_scratch_free(adev, scratch);
4589 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
4590 amdgpu_ring_write(kiq_ring, 0); /* vmid_mask:0 queue_type:0 (KIQ) */
4591 amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */
4592 amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */
4593 amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
4594 amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
4595 amdgpu_ring_write(kiq_ring, 0); /* oac mask */
4596 amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
4597 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4598 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4599 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
4600 uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4603 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
4604 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
4605 amdgpu_ring_write(kiq_ring,
4606 PACKET3_MAP_QUEUES_NUM_QUEUES(1));
4607 amdgpu_ring_write(kiq_ring,
4608 PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index) |
4609 PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
4610 PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
4611 PACKET3_MAP_QUEUES_ME(ring->me == 1 ? 0 : 1)); /* doorbell */
4612 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
4613 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
4614 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
4615 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
4617 /* write to scratch for completion */
4618 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
4619 amdgpu_ring_write(kiq_ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
4620 amdgpu_ring_write(kiq_ring, 0xDEADBEEF);
4621 amdgpu_ring_commit(kiq_ring);
4623 for (i = 0; i < adev->usec_timeout; i++) {
4624 tmp = RREG32(scratch);
4625 if (tmp == 0xDEADBEEF)
4629 if (i >= adev->usec_timeout) {
4630 DRM_ERROR("KCQ enable failed (scratch(0x%04X)=0x%08X)\n",
4634 amdgpu_gfx_scratch_free(adev, scratch);
4639 static int gfx_v8_0_deactivate_hqd(struct amdgpu_device *adev, u32 req)
4643 if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) {
4644 WREG32_FIELD(CP_HQD_DEQUEUE_REQUEST, DEQUEUE_REQ, req);
4645 for (i = 0; i < adev->usec_timeout; i++) {
4646 if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK))
4650 if (i == adev->usec_timeout)
4653 WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
4654 WREG32(mmCP_HQD_PQ_RPTR, 0);
4655 WREG32(mmCP_HQD_PQ_WPTR, 0);
4660 static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
4662 struct amdgpu_device *adev = ring->adev;
4663 struct vi_mqd *mqd = ring->mqd_ptr;
4664 uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
4667 mqd->header = 0xC0310800;
4668 mqd->compute_pipelinestat_enable = 0x00000001;
4669 mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4670 mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4671 mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4672 mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4673 mqd->compute_misc_reserved = 0x00000003;
4674 if (!(adev->flags & AMD_IS_APU)) {
4675 mqd->dynamic_cu_mask_addr_lo = lower_32_bits(ring->mqd_gpu_addr
4676 + offsetof(struct vi_mqd_allocation, dyamic_cu_mask));
4677 mqd->dynamic_cu_mask_addr_hi = upper_32_bits(ring->mqd_gpu_addr
4678 + offsetof(struct vi_mqd_allocation, dyamic_cu_mask));
4680 eop_base_addr = ring->eop_gpu_addr >> 8;
4681 mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
4682 mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
4684 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4685 tmp = RREG32(mmCP_HQD_EOP_CONTROL);
4686 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
4687 (order_base_2(GFX8_MEC_HPD_SIZE / 4) - 1));
4689 mqd->cp_hqd_eop_control = tmp;
4691 /* enable doorbell? */
4692 tmp = REG_SET_FIELD(RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL),
4693 CP_HQD_PQ_DOORBELL_CONTROL,
4695 ring->use_doorbell ? 1 : 0);
4697 mqd->cp_hqd_pq_doorbell_control = tmp;
4699 /* set the pointer to the MQD */
4700 mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
4701 mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
4703 /* set MQD vmid to 0 */
4704 tmp = RREG32(mmCP_MQD_CONTROL);
4705 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4706 mqd->cp_mqd_control = tmp;
4708 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4709 hqd_gpu_addr = ring->gpu_addr >> 8;
4710 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4711 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4713 /* set up the HQD, this is similar to CP_RB0_CNTL */
4714 tmp = RREG32(mmCP_HQD_PQ_CONTROL);
4715 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4716 (order_base_2(ring->ring_size / 4) - 1));
4717 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4718 ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
4720 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
4722 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
4723 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
4724 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4725 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4726 mqd->cp_hqd_pq_control = tmp;
4728 /* set the wb address whether it's enabled or not */
4729 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4730 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4731 mqd->cp_hqd_pq_rptr_report_addr_hi =
4732 upper_32_bits(wb_gpu_addr) & 0xffff;
4734 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4735 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4736 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
4737 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4740 /* enable the doorbell if requested */
4741 if (ring->use_doorbell) {
4742 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4743 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4744 DOORBELL_OFFSET, ring->doorbell_index);
4746 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4748 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4749 DOORBELL_SOURCE, 0);
4750 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4754 mqd->cp_hqd_pq_doorbell_control = tmp;
4756 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4758 mqd->cp_hqd_pq_wptr = ring->wptr;
4759 mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
4761 /* set the vmid for the queue */
4762 mqd->cp_hqd_vmid = 0;
4764 tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
4765 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
4766 mqd->cp_hqd_persistent_state = tmp;
4769 tmp = RREG32(mmCP_HQD_IB_CONTROL);
4770 tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
4771 tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MTYPE, 3);
4772 mqd->cp_hqd_ib_control = tmp;
4774 tmp = RREG32(mmCP_HQD_IQ_TIMER);
4775 tmp = REG_SET_FIELD(tmp, CP_HQD_IQ_TIMER, MTYPE, 3);
4776 mqd->cp_hqd_iq_timer = tmp;
4778 tmp = RREG32(mmCP_HQD_CTX_SAVE_CONTROL);
4779 tmp = REG_SET_FIELD(tmp, CP_HQD_CTX_SAVE_CONTROL, MTYPE, 3);
4780 mqd->cp_hqd_ctx_save_control = tmp;
4783 mqd->cp_hqd_eop_rptr = RREG32(mmCP_HQD_EOP_RPTR);
4784 mqd->cp_hqd_eop_wptr = RREG32(mmCP_HQD_EOP_WPTR);
4785 mqd->cp_hqd_pipe_priority = RREG32(mmCP_HQD_PIPE_PRIORITY);
4786 mqd->cp_hqd_queue_priority = RREG32(mmCP_HQD_QUEUE_PRIORITY);
4787 mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM);
4788 mqd->cp_hqd_ctx_save_base_addr_lo = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_LO);
4789 mqd->cp_hqd_ctx_save_base_addr_hi = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_HI);
4790 mqd->cp_hqd_cntl_stack_offset = RREG32(mmCP_HQD_CNTL_STACK_OFFSET);
4791 mqd->cp_hqd_cntl_stack_size = RREG32(mmCP_HQD_CNTL_STACK_SIZE);
4792 mqd->cp_hqd_wg_state_offset = RREG32(mmCP_HQD_WG_STATE_OFFSET);
4793 mqd->cp_hqd_ctx_save_size = RREG32(mmCP_HQD_CTX_SAVE_SIZE);
4794 mqd->cp_hqd_eop_done_events = RREG32(mmCP_HQD_EOP_EVENTS);
4795 mqd->cp_hqd_error = RREG32(mmCP_HQD_ERROR);
4796 mqd->cp_hqd_eop_wptr_mem = RREG32(mmCP_HQD_EOP_WPTR_MEM);
4797 mqd->cp_hqd_eop_dones = RREG32(mmCP_HQD_EOP_DONES);
4799 /* activate the queue */
4800 mqd->cp_hqd_active = 1;
4805 int gfx_v8_0_mqd_commit(struct amdgpu_device *adev,
4811 /* HQD registers extend from mmCP_MQD_BASE_ADDR to mmCP_HQD_ERROR */
4812 mqd_data = &mqd->cp_mqd_base_addr_lo;
4814 /* disable wptr polling */
4815 WREG32_FIELD(CP_PQ_WPTR_POLL_CNTL, EN, 0);
4817 /* program all HQD registers */
4818 for (mqd_reg = mmCP_HQD_VMID; mqd_reg <= mmCP_HQD_EOP_CONTROL; mqd_reg++)
4819 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4821 /* Tonga errata: EOP RPTR/WPTR should be left unmodified.
4822 * This is safe since EOP RPTR==WPTR for any inactive HQD
4823 * on ASICs that do not support context-save.
4824 * EOP writes/reads can start anywhere in the ring.
4826 if (adev->asic_type != CHIP_TONGA) {
4827 WREG32(mmCP_HQD_EOP_RPTR, mqd->cp_hqd_eop_rptr);
4828 WREG32(mmCP_HQD_EOP_WPTR, mqd->cp_hqd_eop_wptr);
4829 WREG32(mmCP_HQD_EOP_WPTR_MEM, mqd->cp_hqd_eop_wptr_mem);
4832 for (mqd_reg = mmCP_HQD_EOP_EVENTS; mqd_reg <= mmCP_HQD_ERROR; mqd_reg++)
4833 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4835 /* activate the HQD */
4836 for (mqd_reg = mmCP_MQD_BASE_ADDR; mqd_reg <= mmCP_HQD_ACTIVE; mqd_reg++)
4837 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4842 static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring)
4844 struct amdgpu_device *adev = ring->adev;
4845 struct vi_mqd *mqd = ring->mqd_ptr;
4846 int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
4848 gfx_v8_0_kiq_setting(ring);
4850 if (adev->gfx.in_reset) { /* for GPU_RESET case */
4851 /* reset MQD to a clean status */
4852 if (adev->gfx.mec.mqd_backup[mqd_idx])
4853 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4855 /* reset ring buffer */
4857 amdgpu_ring_clear_ring(ring);
4858 mutex_lock(&adev->srbm_mutex);
4859 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4860 gfx_v8_0_mqd_commit(adev, mqd);
4861 vi_srbm_select(adev, 0, 0, 0, 0);
4862 mutex_unlock(&adev->srbm_mutex);
4864 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4865 ((struct vi_mqd_allocation *)mqd)->dyamic_cu_mask = 0xFFFFFFFF;
4866 ((struct vi_mqd_allocation *)mqd)->dyamic_rb_mask = 0xFFFFFFFF;
4867 mutex_lock(&adev->srbm_mutex);
4868 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4869 gfx_v8_0_mqd_init(ring);
4870 gfx_v8_0_mqd_commit(adev, mqd);
4871 vi_srbm_select(adev, 0, 0, 0, 0);
4872 mutex_unlock(&adev->srbm_mutex);
4874 if (adev->gfx.mec.mqd_backup[mqd_idx])
4875 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4881 static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring)
4883 struct amdgpu_device *adev = ring->adev;
4884 struct vi_mqd *mqd = ring->mqd_ptr;
4885 int mqd_idx = ring - &adev->gfx.compute_ring[0];
4887 if (!adev->gfx.in_reset && !adev->gfx.in_suspend) {
4888 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4889 ((struct vi_mqd_allocation *)mqd)->dyamic_cu_mask = 0xFFFFFFFF;
4890 ((struct vi_mqd_allocation *)mqd)->dyamic_rb_mask = 0xFFFFFFFF;
4891 mutex_lock(&adev->srbm_mutex);
4892 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4893 gfx_v8_0_mqd_init(ring);
4894 vi_srbm_select(adev, 0, 0, 0, 0);
4895 mutex_unlock(&adev->srbm_mutex);
4897 if (adev->gfx.mec.mqd_backup[mqd_idx])
4898 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4899 } else if (adev->gfx.in_reset) { /* for GPU_RESET case */
4900 /* reset MQD to a clean status */
4901 if (adev->gfx.mec.mqd_backup[mqd_idx])
4902 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4903 /* reset ring buffer */
4905 amdgpu_ring_clear_ring(ring);
4907 amdgpu_ring_clear_ring(ring);
4912 static void gfx_v8_0_set_mec_doorbell_range(struct amdgpu_device *adev)
4914 if (adev->asic_type > CHIP_TONGA) {
4915 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, AMDGPU_DOORBELL_KIQ << 2);
4916 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, AMDGPU_DOORBELL_MEC_RING7 << 2);
4918 /* enable doorbells */
4919 WREG32_FIELD(CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4922 static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev)
4924 struct amdgpu_ring *ring = NULL;
4927 gfx_v8_0_cp_compute_enable(adev, true);
4929 ring = &adev->gfx.kiq.ring;
4931 r = amdgpu_bo_reserve(ring->mqd_obj, false);
4932 if (unlikely(r != 0))
4935 r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4937 r = gfx_v8_0_kiq_init_queue(ring);
4938 amdgpu_bo_kunmap(ring->mqd_obj);
4939 ring->mqd_ptr = NULL;
4941 amdgpu_bo_unreserve(ring->mqd_obj);
4945 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4946 ring = &adev->gfx.compute_ring[i];
4948 r = amdgpu_bo_reserve(ring->mqd_obj, false);
4949 if (unlikely(r != 0))
4951 r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4953 r = gfx_v8_0_kcq_init_queue(ring);
4954 amdgpu_bo_kunmap(ring->mqd_obj);
4955 ring->mqd_ptr = NULL;
4957 amdgpu_bo_unreserve(ring->mqd_obj);
4962 gfx_v8_0_set_mec_doorbell_range(adev);
4964 r = gfx_v8_0_kiq_kcq_enable(adev);
4969 ring = &adev->gfx.kiq.ring;
4971 r = amdgpu_ring_test_ring(ring);
4973 ring->ready = false;
4978 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4979 ring = &adev->gfx.compute_ring[i];
4981 r = amdgpu_ring_test_ring(ring);
4983 ring->ready = false;
4990 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
4994 if (!(adev->flags & AMD_IS_APU))
4995 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4997 if (!adev->pp_enabled) {
4998 if (adev->firmware.load_type != AMDGPU_FW_LOAD_SMU) {
4999 /* legacy firmware loading */
5000 r = gfx_v8_0_cp_gfx_load_microcode(adev);
5004 r = gfx_v8_0_cp_compute_load_microcode(adev);
5008 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
5009 AMDGPU_UCODE_ID_CP_CE);
5013 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
5014 AMDGPU_UCODE_ID_CP_PFP);
5018 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
5019 AMDGPU_UCODE_ID_CP_ME);
5023 if (adev->asic_type == CHIP_TOPAZ) {
5024 r = gfx_v8_0_cp_compute_load_microcode(adev);
5028 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
5029 AMDGPU_UCODE_ID_CP_MEC1);
5036 r = gfx_v8_0_cp_gfx_resume(adev);
5040 r = gfx_v8_0_kiq_resume(adev);
5044 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5049 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
5051 gfx_v8_0_cp_gfx_enable(adev, enable);
5052 gfx_v8_0_cp_compute_enable(adev, enable);
5055 static int gfx_v8_0_hw_init(void *handle)
5058 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5060 gfx_v8_0_init_golden_registers(adev);
5061 gfx_v8_0_gpu_init(adev);
5063 r = gfx_v8_0_rlc_resume(adev);
5067 r = gfx_v8_0_cp_resume(adev);
5072 static int gfx_v8_0_hw_fini(void *handle)
5074 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5076 amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
5077 amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
5078 if (amdgpu_sriov_vf(adev)) {
5079 pr_debug("For SRIOV client, shouldn't do anything.\n");
5082 gfx_v8_0_cp_enable(adev, false);
5083 gfx_v8_0_rlc_stop(adev);
5085 amdgpu_set_powergating_state(adev,
5086 AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_UNGATE);
5091 static int gfx_v8_0_suspend(void *handle)
5093 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5094 adev->gfx.in_suspend = true;
5095 return gfx_v8_0_hw_fini(adev);
5098 static int gfx_v8_0_resume(void *handle)
5101 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5103 r = gfx_v8_0_hw_init(adev);
5104 adev->gfx.in_suspend = false;
5108 static bool gfx_v8_0_is_idle(void *handle)
5110 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5112 if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE))
5118 static int gfx_v8_0_wait_for_idle(void *handle)
5121 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5123 for (i = 0; i < adev->usec_timeout; i++) {
5124 if (gfx_v8_0_is_idle(handle))
5132 static bool gfx_v8_0_check_soft_reset(void *handle)
5134 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5135 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5139 tmp = RREG32(mmGRBM_STATUS);
5140 if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
5141 GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
5142 GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
5143 GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
5144 GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
5145 GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK |
5146 GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
5147 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5148 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
5149 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5150 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
5151 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5152 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5156 tmp = RREG32(mmGRBM_STATUS2);
5157 if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
5158 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5159 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
5161 if (REG_GET_FIELD(tmp, GRBM_STATUS2, CPF_BUSY) ||
5162 REG_GET_FIELD(tmp, GRBM_STATUS2, CPC_BUSY) ||
5163 REG_GET_FIELD(tmp, GRBM_STATUS2, CPG_BUSY)) {
5164 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5166 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5168 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5170 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET,
5171 SOFT_RESET_GRBM, 1);
5175 tmp = RREG32(mmSRBM_STATUS);
5176 if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
5177 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5178 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5179 if (REG_GET_FIELD(tmp, SRBM_STATUS, SEM_BUSY))
5180 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5181 SRBM_SOFT_RESET, SOFT_RESET_SEM, 1);
5183 if (grbm_soft_reset || srbm_soft_reset) {
5184 adev->gfx.grbm_soft_reset = grbm_soft_reset;
5185 adev->gfx.srbm_soft_reset = srbm_soft_reset;
5188 adev->gfx.grbm_soft_reset = 0;
5189 adev->gfx.srbm_soft_reset = 0;
5194 static int gfx_v8_0_pre_soft_reset(void *handle)
5196 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5197 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5199 if ((!adev->gfx.grbm_soft_reset) &&
5200 (!adev->gfx.srbm_soft_reset))
5203 grbm_soft_reset = adev->gfx.grbm_soft_reset;
5204 srbm_soft_reset = adev->gfx.srbm_soft_reset;
5207 gfx_v8_0_rlc_stop(adev);
5209 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5210 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5211 /* Disable GFX parsing/prefetching */
5212 gfx_v8_0_cp_gfx_enable(adev, false);
5214 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5215 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5216 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5217 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5220 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5221 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5223 mutex_lock(&adev->srbm_mutex);
5224 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5225 gfx_v8_0_deactivate_hqd(adev, 2);
5226 vi_srbm_select(adev, 0, 0, 0, 0);
5227 mutex_unlock(&adev->srbm_mutex);
5229 /* Disable MEC parsing/prefetching */
5230 gfx_v8_0_cp_compute_enable(adev, false);
5236 static int gfx_v8_0_soft_reset(void *handle)
5238 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5239 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5242 if ((!adev->gfx.grbm_soft_reset) &&
5243 (!adev->gfx.srbm_soft_reset))
5246 grbm_soft_reset = adev->gfx.grbm_soft_reset;
5247 srbm_soft_reset = adev->gfx.srbm_soft_reset;
5249 if (grbm_soft_reset || srbm_soft_reset) {
5250 tmp = RREG32(mmGMCON_DEBUG);
5251 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 1);
5252 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 1);
5253 WREG32(mmGMCON_DEBUG, tmp);
5257 if (grbm_soft_reset) {
5258 tmp = RREG32(mmGRBM_SOFT_RESET);
5259 tmp |= grbm_soft_reset;
5260 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5261 WREG32(mmGRBM_SOFT_RESET, tmp);
5262 tmp = RREG32(mmGRBM_SOFT_RESET);
5266 tmp &= ~grbm_soft_reset;
5267 WREG32(mmGRBM_SOFT_RESET, tmp);
5268 tmp = RREG32(mmGRBM_SOFT_RESET);
5271 if (srbm_soft_reset) {
5272 tmp = RREG32(mmSRBM_SOFT_RESET);
5273 tmp |= srbm_soft_reset;
5274 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5275 WREG32(mmSRBM_SOFT_RESET, tmp);
5276 tmp = RREG32(mmSRBM_SOFT_RESET);
5280 tmp &= ~srbm_soft_reset;
5281 WREG32(mmSRBM_SOFT_RESET, tmp);
5282 tmp = RREG32(mmSRBM_SOFT_RESET);
5285 if (grbm_soft_reset || srbm_soft_reset) {
5286 tmp = RREG32(mmGMCON_DEBUG);
5287 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 0);
5288 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 0);
5289 WREG32(mmGMCON_DEBUG, tmp);
5292 /* Wait a little for things to settle down */
5298 static int gfx_v8_0_post_soft_reset(void *handle)
5300 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5301 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5303 if ((!adev->gfx.grbm_soft_reset) &&
5304 (!adev->gfx.srbm_soft_reset))
5307 grbm_soft_reset = adev->gfx.grbm_soft_reset;
5308 srbm_soft_reset = adev->gfx.srbm_soft_reset;
5310 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5311 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5312 gfx_v8_0_cp_gfx_resume(adev);
5314 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5315 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5316 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5317 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5320 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5321 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5323 mutex_lock(&adev->srbm_mutex);
5324 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5325 gfx_v8_0_deactivate_hqd(adev, 2);
5326 vi_srbm_select(adev, 0, 0, 0, 0);
5327 mutex_unlock(&adev->srbm_mutex);
5329 gfx_v8_0_kiq_resume(adev);
5331 gfx_v8_0_rlc_start(adev);
5337 * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
5339 * @adev: amdgpu_device pointer
5341 * Fetches a GPU clock counter snapshot.
5342 * Returns the 64 bit clock counter snapshot.
5344 static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
5348 mutex_lock(&adev->gfx.gpu_clock_mutex);
5349 WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
5350 clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
5351 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5352 mutex_unlock(&adev->gfx.gpu_clock_mutex);
5356 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
5358 uint32_t gds_base, uint32_t gds_size,
5359 uint32_t gws_base, uint32_t gws_size,
5360 uint32_t oa_base, uint32_t oa_size)
5362 gds_base = gds_base >> AMDGPU_GDS_SHIFT;
5363 gds_size = gds_size >> AMDGPU_GDS_SHIFT;
5365 gws_base = gws_base >> AMDGPU_GWS_SHIFT;
5366 gws_size = gws_size >> AMDGPU_GWS_SHIFT;
5368 oa_base = oa_base >> AMDGPU_OA_SHIFT;
5369 oa_size = oa_size >> AMDGPU_OA_SHIFT;
5372 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5373 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5374 WRITE_DATA_DST_SEL(0)));
5375 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
5376 amdgpu_ring_write(ring, 0);
5377 amdgpu_ring_write(ring, gds_base);
5380 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5381 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5382 WRITE_DATA_DST_SEL(0)));
5383 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
5384 amdgpu_ring_write(ring, 0);
5385 amdgpu_ring_write(ring, gds_size);
5388 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5389 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5390 WRITE_DATA_DST_SEL(0)));
5391 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
5392 amdgpu_ring_write(ring, 0);
5393 amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
5396 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5397 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5398 WRITE_DATA_DST_SEL(0)));
5399 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
5400 amdgpu_ring_write(ring, 0);
5401 amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
5404 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
5406 WREG32(mmSQ_IND_INDEX,
5407 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5408 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5409 (address << SQ_IND_INDEX__INDEX__SHIFT) |
5410 (SQ_IND_INDEX__FORCE_READ_MASK));
5411 return RREG32(mmSQ_IND_DATA);
5414 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
5415 uint32_t wave, uint32_t thread,
5416 uint32_t regno, uint32_t num, uint32_t *out)
5418 WREG32(mmSQ_IND_INDEX,
5419 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5420 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5421 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
5422 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
5423 (SQ_IND_INDEX__FORCE_READ_MASK) |
5424 (SQ_IND_INDEX__AUTO_INCR_MASK));
5426 *(out++) = RREG32(mmSQ_IND_DATA);
5429 static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
5431 /* type 0 wave data */
5432 dst[(*no_fields)++] = 0;
5433 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
5434 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
5435 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
5436 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
5437 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
5438 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
5439 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
5440 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
5441 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
5442 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
5443 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
5444 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
5445 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_LO);
5446 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_HI);
5447 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_LO);
5448 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI);
5449 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
5450 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
5453 static void gfx_v8_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
5454 uint32_t wave, uint32_t start,
5455 uint32_t size, uint32_t *dst)
5458 adev, simd, wave, 0,
5459 start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
5463 static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = {
5464 .get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter,
5465 .select_se_sh = &gfx_v8_0_select_se_sh,
5466 .read_wave_data = &gfx_v8_0_read_wave_data,
5467 .read_wave_sgprs = &gfx_v8_0_read_wave_sgprs,
5470 static int gfx_v8_0_early_init(void *handle)
5472 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5474 adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
5475 adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
5476 adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
5477 gfx_v8_0_set_ring_funcs(adev);
5478 gfx_v8_0_set_irq_funcs(adev);
5479 gfx_v8_0_set_gds_init(adev);
5480 gfx_v8_0_set_rlc_funcs(adev);
5485 static int gfx_v8_0_late_init(void *handle)
5487 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5490 r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
5494 r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
5498 /* requires IBs so do in late init after IB pool is initialized */
5499 r = gfx_v8_0_do_edc_gpr_workarounds(adev);
5503 amdgpu_set_powergating_state(adev,
5504 AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_GATE);
5509 static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
5512 if ((adev->asic_type == CHIP_POLARIS11) ||
5513 (adev->asic_type == CHIP_POLARIS12))
5514 /* Send msg to SMU via Powerplay */
5515 amdgpu_set_powergating_state(adev,
5516 AMD_IP_BLOCK_TYPE_SMC,
5518 AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE);
5520 WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0);
5523 static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
5526 WREG32_FIELD(RLC_PG_CNTL, DYN_PER_CU_PG_ENABLE, enable ? 1 : 0);
5529 static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
5532 WREG32_FIELD(RLC_PG_CNTL, QUICK_PG_ENABLE, enable ? 1 : 0);
5535 static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
5538 WREG32_FIELD(RLC_PG_CNTL, GFX_POWER_GATING_ENABLE, enable ? 1 : 0);
5541 static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev,
5544 WREG32_FIELD(RLC_PG_CNTL, GFX_PIPELINE_PG_ENABLE, enable ? 1 : 0);
5546 /* Read any GFX register to wake up GFX. */
5548 RREG32(mmDB_RENDER_CONTROL);
5551 static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev,
5554 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
5555 cz_enable_gfx_cg_power_gating(adev, true);
5556 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
5557 cz_enable_gfx_pipeline_power_gating(adev, true);
5559 cz_enable_gfx_cg_power_gating(adev, false);
5560 cz_enable_gfx_pipeline_power_gating(adev, false);
5564 static int gfx_v8_0_set_powergating_state(void *handle,
5565 enum amd_powergating_state state)
5567 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5568 bool enable = (state == AMD_PG_STATE_GATE);
5570 if (amdgpu_sriov_vf(adev))
5573 switch (adev->asic_type) {
5577 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
5578 cz_enable_sck_slow_down_on_power_up(adev, true);
5579 cz_enable_sck_slow_down_on_power_down(adev, true);
5581 cz_enable_sck_slow_down_on_power_up(adev, false);
5582 cz_enable_sck_slow_down_on_power_down(adev, false);
5584 if (adev->pg_flags & AMD_PG_SUPPORT_CP)
5585 cz_enable_cp_power_gating(adev, true);
5587 cz_enable_cp_power_gating(adev, false);
5589 cz_update_gfx_cg_power_gating(adev, enable);
5591 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5592 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5594 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5596 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5597 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5599 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5601 case CHIP_POLARIS11:
5602 case CHIP_POLARIS12:
5603 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5604 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5606 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5608 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5609 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5611 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5613 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable)
5614 polaris11_enable_gfx_quick_mg_power_gating(adev, true);
5616 polaris11_enable_gfx_quick_mg_power_gating(adev, false);
5625 static void gfx_v8_0_get_clockgating_state(void *handle, u32 *flags)
5627 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5630 if (amdgpu_sriov_vf(adev))
5633 /* AMD_CG_SUPPORT_GFX_MGCG */
5634 data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5635 if (!(data & RLC_CGTT_MGCG_OVERRIDE__CPF_MASK))
5636 *flags |= AMD_CG_SUPPORT_GFX_MGCG;
5638 /* AMD_CG_SUPPORT_GFX_CGLG */
5639 data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5640 if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5641 *flags |= AMD_CG_SUPPORT_GFX_CGCG;
5643 /* AMD_CG_SUPPORT_GFX_CGLS */
5644 if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5645 *flags |= AMD_CG_SUPPORT_GFX_CGLS;
5647 /* AMD_CG_SUPPORT_GFX_CGTS */
5648 data = RREG32(mmCGTS_SM_CTRL_REG);
5649 if (!(data & CGTS_SM_CTRL_REG__OVERRIDE_MASK))
5650 *flags |= AMD_CG_SUPPORT_GFX_CGTS;
5652 /* AMD_CG_SUPPORT_GFX_CGTS_LS */
5653 if (!(data & CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK))
5654 *flags |= AMD_CG_SUPPORT_GFX_CGTS_LS;
5656 /* AMD_CG_SUPPORT_GFX_RLC_LS */
5657 data = RREG32(mmRLC_MEM_SLP_CNTL);
5658 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
5659 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
5661 /* AMD_CG_SUPPORT_GFX_CP_LS */
5662 data = RREG32(mmCP_MEM_SLP_CNTL);
5663 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
5664 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
5667 static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
5668 uint32_t reg_addr, uint32_t cmd)
5672 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
5674 WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5675 WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5677 data = RREG32(mmRLC_SERDES_WR_CTRL);
5678 if (adev->asic_type == CHIP_STONEY)
5679 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5680 RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5681 RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5682 RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5683 RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5684 RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5685 RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5686 RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5687 RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5689 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5690 RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5691 RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5692 RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5693 RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5694 RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5695 RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5696 RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5697 RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
5698 RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
5699 RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5700 data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
5701 (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
5702 (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
5703 (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
5705 WREG32(mmRLC_SERDES_WR_CTRL, data);
5708 #define MSG_ENTER_RLC_SAFE_MODE 1
5709 #define MSG_EXIT_RLC_SAFE_MODE 0
5710 #define RLC_GPR_REG2__REQ_MASK 0x00000001
5711 #define RLC_GPR_REG2__REQ__SHIFT 0
5712 #define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001
5713 #define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e
5715 static void iceland_enter_rlc_safe_mode(struct amdgpu_device *adev)
5720 data = RREG32(mmRLC_CNTL);
5721 if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5724 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5725 data |= RLC_SAFE_MODE__CMD_MASK;
5726 data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5727 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
5728 WREG32(mmRLC_SAFE_MODE, data);
5730 for (i = 0; i < adev->usec_timeout; i++) {
5731 if ((RREG32(mmRLC_GPM_STAT) &
5732 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5733 RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5734 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5735 RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5740 for (i = 0; i < adev->usec_timeout; i++) {
5741 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5745 adev->gfx.rlc.in_safe_mode = true;
5749 static void iceland_exit_rlc_safe_mode(struct amdgpu_device *adev)
5754 data = RREG32(mmRLC_CNTL);
5755 if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5758 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5759 if (adev->gfx.rlc.in_safe_mode) {
5760 data |= RLC_SAFE_MODE__CMD_MASK;
5761 data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5762 WREG32(mmRLC_SAFE_MODE, data);
5763 adev->gfx.rlc.in_safe_mode = false;
5767 for (i = 0; i < adev->usec_timeout; i++) {
5768 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5774 static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
5775 .enter_safe_mode = iceland_enter_rlc_safe_mode,
5776 .exit_safe_mode = iceland_exit_rlc_safe_mode
5779 static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
5782 uint32_t temp, data;
5784 adev->gfx.rlc.funcs->enter_safe_mode(adev);
5786 /* It is disabled by HW by default */
5787 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
5788 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5789 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS)
5790 /* 1 - RLC memory Light sleep */
5791 WREG32_FIELD(RLC_MEM_SLP_CNTL, RLC_MEM_LS_EN, 1);
5793 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS)
5794 WREG32_FIELD(CP_MEM_SLP_CNTL, CP_MEM_LS_EN, 1);
5797 /* 3 - RLC_CGTT_MGCG_OVERRIDE */
5798 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5799 if (adev->flags & AMD_IS_APU)
5800 data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5801 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5802 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK);
5804 data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5805 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5806 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5807 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5810 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5812 /* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5813 gfx_v8_0_wait_for_rlc_serdes(adev);
5815 /* 5 - clear mgcg override */
5816 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5818 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
5819 /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
5820 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5821 data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
5822 data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
5823 data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
5824 data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
5825 if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
5826 (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
5827 data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
5828 data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
5829 data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
5831 WREG32(mmCGTS_SM_CTRL_REG, data);
5835 /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5836 gfx_v8_0_wait_for_rlc_serdes(adev);
5838 /* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
5839 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5840 data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5841 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5842 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5843 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5845 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5847 /* 2 - disable MGLS in RLC */
5848 data = RREG32(mmRLC_MEM_SLP_CNTL);
5849 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
5850 data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5851 WREG32(mmRLC_MEM_SLP_CNTL, data);
5854 /* 3 - disable MGLS in CP */
5855 data = RREG32(mmCP_MEM_SLP_CNTL);
5856 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
5857 data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5858 WREG32(mmCP_MEM_SLP_CNTL, data);
5861 /* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
5862 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5863 data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
5864 CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
5866 WREG32(mmCGTS_SM_CTRL_REG, data);
5868 /* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5869 gfx_v8_0_wait_for_rlc_serdes(adev);
5871 /* 6 - set mgcg override */
5872 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5876 /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5877 gfx_v8_0_wait_for_rlc_serdes(adev);
5880 adev->gfx.rlc.funcs->exit_safe_mode(adev);
5883 static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5886 uint32_t temp, temp1, data, data1;
5888 temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5890 adev->gfx.rlc.funcs->enter_safe_mode(adev);
5892 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
5893 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5894 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
5896 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5898 /* : wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5899 gfx_v8_0_wait_for_rlc_serdes(adev);
5901 /* 2 - clear cgcg override */
5902 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5904 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5905 gfx_v8_0_wait_for_rlc_serdes(adev);
5907 /* 3 - write cmd to set CGLS */
5908 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
5910 /* 4 - enable cgcg */
5911 data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5913 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5915 data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5917 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5918 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
5921 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5923 data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5927 WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5929 /* 5 enable cntx_empty_int_enable/cntx_busy_int_enable/
5930 * Cmp_busy/GFX_Idle interrupts
5932 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5934 /* disable cntx_empty_int_enable & GFX Idle interrupt */
5935 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5938 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5939 data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
5940 RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
5942 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5944 /* read gfx register to wake up cgcg */
5945 RREG32(mmCB_CGTT_SCLK_CTRL);
5946 RREG32(mmCB_CGTT_SCLK_CTRL);
5947 RREG32(mmCB_CGTT_SCLK_CTRL);
5948 RREG32(mmCB_CGTT_SCLK_CTRL);
5950 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5951 gfx_v8_0_wait_for_rlc_serdes(adev);
5953 /* write cmd to Set CGCG Overrride */
5954 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5956 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5957 gfx_v8_0_wait_for_rlc_serdes(adev);
5959 /* write cmd to Clear CGLS */
5960 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
5962 /* disable cgcg, cgls should be disabled too. */
5963 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
5964 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
5966 WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5967 /* enable interrupts again for PG */
5968 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5971 gfx_v8_0_wait_for_rlc_serdes(adev);
5973 adev->gfx.rlc.funcs->exit_safe_mode(adev);
5975 static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
5979 /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
5980 * === MGCG + MGLS + TS(CG/LS) ===
5982 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5983 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5985 /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
5986 * === CGCG + CGLS ===
5988 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5989 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5994 static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev,
5995 enum amd_clockgating_state state)
5997 uint32_t msg_id, pp_state = 0;
5998 uint32_t pp_support_state = 0;
5999 void *pp_handle = adev->powerplay.pp_handle;
6001 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
6002 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
6003 pp_support_state = PP_STATE_SUPPORT_LS;
6004 pp_state = PP_STATE_LS;
6006 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
6007 pp_support_state |= PP_STATE_SUPPORT_CG;
6008 pp_state |= PP_STATE_CG;
6010 if (state == AMD_CG_STATE_UNGATE)
6013 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6017 amd_set_clockgating_by_smu(pp_handle, msg_id);
6020 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
6021 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
6022 pp_support_state = PP_STATE_SUPPORT_LS;
6023 pp_state = PP_STATE_LS;
6026 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
6027 pp_support_state |= PP_STATE_SUPPORT_CG;
6028 pp_state |= PP_STATE_CG;
6031 if (state == AMD_CG_STATE_UNGATE)
6034 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6038 amd_set_clockgating_by_smu(pp_handle, msg_id);
6044 static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev,
6045 enum amd_clockgating_state state)
6048 uint32_t msg_id, pp_state = 0;
6049 uint32_t pp_support_state = 0;
6050 void *pp_handle = adev->powerplay.pp_handle;
6052 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
6053 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
6054 pp_support_state = PP_STATE_SUPPORT_LS;
6055 pp_state = PP_STATE_LS;
6057 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
6058 pp_support_state |= PP_STATE_SUPPORT_CG;
6059 pp_state |= PP_STATE_CG;
6061 if (state == AMD_CG_STATE_UNGATE)
6064 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6068 amd_set_clockgating_by_smu(pp_handle, msg_id);
6071 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_GFX_3D_CGLS)) {
6072 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) {
6073 pp_support_state = PP_STATE_SUPPORT_LS;
6074 pp_state = PP_STATE_LS;
6076 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) {
6077 pp_support_state |= PP_STATE_SUPPORT_CG;
6078 pp_state |= PP_STATE_CG;
6080 if (state == AMD_CG_STATE_UNGATE)
6083 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6087 amd_set_clockgating_by_smu(pp_handle, msg_id);
6090 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
6091 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
6092 pp_support_state = PP_STATE_SUPPORT_LS;
6093 pp_state = PP_STATE_LS;
6096 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
6097 pp_support_state |= PP_STATE_SUPPORT_CG;
6098 pp_state |= PP_STATE_CG;
6101 if (state == AMD_CG_STATE_UNGATE)
6104 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6108 amd_set_clockgating_by_smu(pp_handle, msg_id);
6111 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
6112 pp_support_state = PP_STATE_SUPPORT_LS;
6114 if (state == AMD_CG_STATE_UNGATE)
6117 pp_state = PP_STATE_LS;
6119 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6123 amd_set_clockgating_by_smu(pp_handle, msg_id);
6126 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
6127 pp_support_state = PP_STATE_SUPPORT_LS;
6129 if (state == AMD_CG_STATE_UNGATE)
6132 pp_state = PP_STATE_LS;
6133 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6137 amd_set_clockgating_by_smu(pp_handle, msg_id);
6143 static int gfx_v8_0_set_clockgating_state(void *handle,
6144 enum amd_clockgating_state state)
6146 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
6148 if (amdgpu_sriov_vf(adev))
6151 switch (adev->asic_type) {
6155 gfx_v8_0_update_gfx_clock_gating(adev,
6156 state == AMD_CG_STATE_GATE);
6159 gfx_v8_0_tonga_update_gfx_clock_gating(adev, state);
6161 case CHIP_POLARIS10:
6162 case CHIP_POLARIS11:
6163 case CHIP_POLARIS12:
6164 gfx_v8_0_polaris_update_gfx_clock_gating(adev, state);
6172 static u64 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring)
6174 return ring->adev->wb.wb[ring->rptr_offs];
6177 static u64 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
6179 struct amdgpu_device *adev = ring->adev;
6181 if (ring->use_doorbell)
6182 /* XXX check if swapping is necessary on BE */
6183 return ring->adev->wb.wb[ring->wptr_offs];
6185 return RREG32(mmCP_RB0_WPTR);
6188 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
6190 struct amdgpu_device *adev = ring->adev;
6192 if (ring->use_doorbell) {
6193 /* XXX check if swapping is necessary on BE */
6194 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6195 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6197 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
6198 (void)RREG32(mmCP_RB0_WPTR);
6202 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
6204 u32 ref_and_mask, reg_mem_engine;
6206 if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) ||
6207 (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)) {
6210 ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
6213 ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
6220 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
6221 reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
6224 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6225 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
6226 WAIT_REG_MEM_FUNCTION(3) | /* == */
6228 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
6229 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
6230 amdgpu_ring_write(ring, ref_and_mask);
6231 amdgpu_ring_write(ring, ref_and_mask);
6232 amdgpu_ring_write(ring, 0x20); /* poll interval */
6235 static void gfx_v8_0_ring_emit_vgt_flush(struct amdgpu_ring *ring)
6237 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6238 amdgpu_ring_write(ring, EVENT_TYPE(VS_PARTIAL_FLUSH) |
6241 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6242 amdgpu_ring_write(ring, EVENT_TYPE(VGT_FLUSH) |
6247 static void gfx_v8_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
6249 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6250 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6251 WRITE_DATA_DST_SEL(0) |
6253 amdgpu_ring_write(ring, mmHDP_DEBUG0);
6254 amdgpu_ring_write(ring, 0);
6255 amdgpu_ring_write(ring, 1);
6259 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
6260 struct amdgpu_ib *ib,
6261 unsigned vm_id, bool ctx_switch)
6263 u32 header, control = 0;
6265 if (ib->flags & AMDGPU_IB_FLAG_CE)
6266 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
6268 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
6270 control |= ib->length_dw | (vm_id << 24);
6272 if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
6273 control |= INDIRECT_BUFFER_PRE_ENB(1);
6275 if (!(ib->flags & AMDGPU_IB_FLAG_CE))
6276 gfx_v8_0_ring_emit_de_meta(ring);
6279 amdgpu_ring_write(ring, header);
6280 amdgpu_ring_write(ring,
6284 (ib->gpu_addr & 0xFFFFFFFC));
6285 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6286 amdgpu_ring_write(ring, control);
6289 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
6290 struct amdgpu_ib *ib,
6291 unsigned vm_id, bool ctx_switch)
6293 u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vm_id << 24);
6295 amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
6296 amdgpu_ring_write(ring,
6300 (ib->gpu_addr & 0xFFFFFFFC));
6301 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6302 amdgpu_ring_write(ring, control);
6305 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
6306 u64 seq, unsigned flags)
6308 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6309 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6311 /* EVENT_WRITE_EOP - flush caches, send int */
6312 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6313 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6315 EOP_TC_WB_ACTION_EN |
6316 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6318 amdgpu_ring_write(ring, addr & 0xfffffffc);
6319 amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
6320 DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6321 amdgpu_ring_write(ring, lower_32_bits(seq));
6322 amdgpu_ring_write(ring, upper_32_bits(seq));
6326 static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
6328 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6329 uint32_t seq = ring->fence_drv.sync_seq;
6330 uint64_t addr = ring->fence_drv.gpu_addr;
6332 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6333 amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
6334 WAIT_REG_MEM_FUNCTION(3) | /* equal */
6335 WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
6336 amdgpu_ring_write(ring, addr & 0xfffffffc);
6337 amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
6338 amdgpu_ring_write(ring, seq);
6339 amdgpu_ring_write(ring, 0xffffffff);
6340 amdgpu_ring_write(ring, 4); /* poll interval */
6343 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
6344 unsigned vm_id, uint64_t pd_addr)
6346 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6348 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6349 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6350 WRITE_DATA_DST_SEL(0)) |
6353 amdgpu_ring_write(ring,
6354 (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id));
6356 amdgpu_ring_write(ring,
6357 (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vm_id - 8));
6359 amdgpu_ring_write(ring, 0);
6360 amdgpu_ring_write(ring, pd_addr >> 12);
6362 /* bits 0-15 are the VM contexts0-15 */
6363 /* invalidate the cache */
6364 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6365 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6366 WRITE_DATA_DST_SEL(0)));
6367 amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6368 amdgpu_ring_write(ring, 0);
6369 amdgpu_ring_write(ring, 1 << vm_id);
6371 /* wait for the invalidate to complete */
6372 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6373 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6374 WAIT_REG_MEM_FUNCTION(0) | /* always */
6375 WAIT_REG_MEM_ENGINE(0))); /* me */
6376 amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6377 amdgpu_ring_write(ring, 0);
6378 amdgpu_ring_write(ring, 0); /* ref */
6379 amdgpu_ring_write(ring, 0); /* mask */
6380 amdgpu_ring_write(ring, 0x20); /* poll interval */
6382 /* compute doesn't have PFP */
6384 /* sync PFP to ME, otherwise we might get invalid PFP reads */
6385 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6386 amdgpu_ring_write(ring, 0x0);
6390 static u64 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
6392 return ring->adev->wb.wb[ring->wptr_offs];
6395 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
6397 struct amdgpu_device *adev = ring->adev;
6399 /* XXX check if swapping is necessary on BE */
6400 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6401 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6404 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
6408 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6409 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6411 /* RELEASE_MEM - flush caches, send int */
6412 amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
6413 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6415 EOP_TC_WB_ACTION_EN |
6416 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6418 amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6419 amdgpu_ring_write(ring, addr & 0xfffffffc);
6420 amdgpu_ring_write(ring, upper_32_bits(addr));
6421 amdgpu_ring_write(ring, lower_32_bits(seq));
6422 amdgpu_ring_write(ring, upper_32_bits(seq));
6425 static void gfx_v8_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
6426 u64 seq, unsigned int flags)
6428 /* we only allocate 32bit for each seq wb address */
6429 BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
6431 /* write fence seq to the "addr" */
6432 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6433 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6434 WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
6435 amdgpu_ring_write(ring, lower_32_bits(addr));
6436 amdgpu_ring_write(ring, upper_32_bits(addr));
6437 amdgpu_ring_write(ring, lower_32_bits(seq));
6439 if (flags & AMDGPU_FENCE_FLAG_INT) {
6440 /* set register to trigger INT */
6441 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6442 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6443 WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
6444 amdgpu_ring_write(ring, mmCPC_INT_STATUS);
6445 amdgpu_ring_write(ring, 0);
6446 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
6450 static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring)
6452 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6453 amdgpu_ring_write(ring, 0);
6456 static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
6460 if (amdgpu_sriov_vf(ring->adev))
6461 gfx_v8_0_ring_emit_ce_meta(ring);
6463 dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
6464 if (flags & AMDGPU_HAVE_CTX_SWITCH) {
6465 gfx_v8_0_ring_emit_vgt_flush(ring);
6466 /* set load_global_config & load_global_uconfig */
6468 /* set load_cs_sh_regs */
6470 /* set load_per_context_state & load_gfx_sh_regs for GFX */
6473 /* set load_ce_ram if preamble presented */
6474 if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
6477 /* still load_ce_ram if this is the first time preamble presented
6478 * although there is no context switch happens.
6480 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
6484 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6485 amdgpu_ring_write(ring, dw2);
6486 amdgpu_ring_write(ring, 0);
6489 static unsigned gfx_v8_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
6493 amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
6494 amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
6495 amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
6496 amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
6497 ret = ring->wptr & ring->buf_mask;
6498 amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
6502 static void gfx_v8_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
6506 BUG_ON(offset > ring->buf_mask);
6507 BUG_ON(ring->ring[offset] != 0x55aa55aa);
6509 cur = (ring->wptr & ring->buf_mask) - 1;
6510 if (likely(cur > offset))
6511 ring->ring[offset] = cur - offset;
6513 ring->ring[offset] = (ring->ring_size >> 2) - offset + cur;
6516 static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
6518 struct amdgpu_device *adev = ring->adev;
6520 amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
6521 amdgpu_ring_write(ring, 0 | /* src: register*/
6522 (5 << 8) | /* dst: memory */
6523 (1 << 20)); /* write confirm */
6524 amdgpu_ring_write(ring, reg);
6525 amdgpu_ring_write(ring, 0);
6526 amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
6527 adev->virt.reg_val_offs * 4));
6528 amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
6529 adev->virt.reg_val_offs * 4));
6532 static void gfx_v8_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
6535 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6536 amdgpu_ring_write(ring, (1 << 16)); /* no inc addr */
6537 amdgpu_ring_write(ring, reg);
6538 amdgpu_ring_write(ring, 0);
6539 amdgpu_ring_write(ring, val);
6542 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
6543 enum amdgpu_interrupt_state state)
6545 WREG32_FIELD(CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE,
6546 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6549 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
6551 enum amdgpu_interrupt_state state)
6553 u32 mec_int_cntl, mec_int_cntl_reg;
6556 * amdgpu controls only the first MEC. That's why this function only
6557 * handles the setting of interrupts for this specific MEC. All other
6558 * pipes' interrupts are set by amdkfd.
6564 mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
6567 mec_int_cntl_reg = mmCP_ME1_PIPE1_INT_CNTL;
6570 mec_int_cntl_reg = mmCP_ME1_PIPE2_INT_CNTL;
6573 mec_int_cntl_reg = mmCP_ME1_PIPE3_INT_CNTL;
6576 DRM_DEBUG("invalid pipe %d\n", pipe);
6580 DRM_DEBUG("invalid me %d\n", me);
6585 case AMDGPU_IRQ_STATE_DISABLE:
6586 mec_int_cntl = RREG32(mec_int_cntl_reg);
6587 mec_int_cntl &= ~CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6588 WREG32(mec_int_cntl_reg, mec_int_cntl);
6590 case AMDGPU_IRQ_STATE_ENABLE:
6591 mec_int_cntl = RREG32(mec_int_cntl_reg);
6592 mec_int_cntl |= CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6593 WREG32(mec_int_cntl_reg, mec_int_cntl);
6600 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
6601 struct amdgpu_irq_src *source,
6603 enum amdgpu_interrupt_state state)
6605 WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE,
6606 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6611 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
6612 struct amdgpu_irq_src *source,
6614 enum amdgpu_interrupt_state state)
6616 WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE,
6617 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6622 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6623 struct amdgpu_irq_src *src,
6625 enum amdgpu_interrupt_state state)
6628 case AMDGPU_CP_IRQ_GFX_EOP:
6629 gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
6631 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6632 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6634 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6635 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6637 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6638 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6640 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6641 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6643 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6644 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6646 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6647 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6649 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6650 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6652 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6653 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6661 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
6662 struct amdgpu_irq_src *source,
6663 struct amdgpu_iv_entry *entry)
6666 u8 me_id, pipe_id, queue_id;
6667 struct amdgpu_ring *ring;
6669 DRM_DEBUG("IH: CP EOP\n");
6670 me_id = (entry->ring_id & 0x0c) >> 2;
6671 pipe_id = (entry->ring_id & 0x03) >> 0;
6672 queue_id = (entry->ring_id & 0x70) >> 4;
6676 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6680 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6681 ring = &adev->gfx.compute_ring[i];
6682 /* Per-queue interrupt is supported for MEC starting from VI.
6683 * The interrupt can only be enabled/disabled per pipe instead of per queue.
6685 if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6686 amdgpu_fence_process(ring);
6693 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
6694 struct amdgpu_irq_src *source,
6695 struct amdgpu_iv_entry *entry)
6697 DRM_ERROR("Illegal register access in command stream\n");
6698 schedule_work(&adev->reset_work);
6702 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
6703 struct amdgpu_irq_src *source,
6704 struct amdgpu_iv_entry *entry)
6706 DRM_ERROR("Illegal instruction in command stream\n");
6707 schedule_work(&adev->reset_work);
6711 static int gfx_v8_0_kiq_set_interrupt_state(struct amdgpu_device *adev,
6712 struct amdgpu_irq_src *src,
6714 enum amdgpu_interrupt_state state)
6716 struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
6719 case AMDGPU_CP_KIQ_IRQ_DRIVER0:
6720 WREG32_FIELD(CPC_INT_CNTL, GENERIC2_INT_ENABLE,
6721 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6723 WREG32_FIELD_OFFSET(CP_ME1_PIPE0_INT_CNTL,
6725 GENERIC2_INT_ENABLE,
6726 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6728 WREG32_FIELD_OFFSET(CP_ME2_PIPE0_INT_CNTL,
6730 GENERIC2_INT_ENABLE,
6731 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6734 BUG(); /* kiq only support GENERIC2_INT now */
6740 static int gfx_v8_0_kiq_irq(struct amdgpu_device *adev,
6741 struct amdgpu_irq_src *source,
6742 struct amdgpu_iv_entry *entry)
6744 u8 me_id, pipe_id, queue_id;
6745 struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
6747 me_id = (entry->ring_id & 0x0c) >> 2;
6748 pipe_id = (entry->ring_id & 0x03) >> 0;
6749 queue_id = (entry->ring_id & 0x70) >> 4;
6750 DRM_DEBUG("IH: CPC GENERIC2_INT, me:%d, pipe:%d, queue:%d\n",
6751 me_id, pipe_id, queue_id);
6753 amdgpu_fence_process(ring);
6757 static const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
6759 .early_init = gfx_v8_0_early_init,
6760 .late_init = gfx_v8_0_late_init,
6761 .sw_init = gfx_v8_0_sw_init,
6762 .sw_fini = gfx_v8_0_sw_fini,
6763 .hw_init = gfx_v8_0_hw_init,
6764 .hw_fini = gfx_v8_0_hw_fini,
6765 .suspend = gfx_v8_0_suspend,
6766 .resume = gfx_v8_0_resume,
6767 .is_idle = gfx_v8_0_is_idle,
6768 .wait_for_idle = gfx_v8_0_wait_for_idle,
6769 .check_soft_reset = gfx_v8_0_check_soft_reset,
6770 .pre_soft_reset = gfx_v8_0_pre_soft_reset,
6771 .soft_reset = gfx_v8_0_soft_reset,
6772 .post_soft_reset = gfx_v8_0_post_soft_reset,
6773 .set_clockgating_state = gfx_v8_0_set_clockgating_state,
6774 .set_powergating_state = gfx_v8_0_set_powergating_state,
6775 .get_clockgating_state = gfx_v8_0_get_clockgating_state,
6778 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
6779 .type = AMDGPU_RING_TYPE_GFX,
6781 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6782 .support_64bit_ptrs = false,
6783 .get_rptr = gfx_v8_0_ring_get_rptr,
6784 .get_wptr = gfx_v8_0_ring_get_wptr_gfx,
6785 .set_wptr = gfx_v8_0_ring_set_wptr_gfx,
6786 .emit_frame_size = /* maximum 215dw if count 16 IBs in */
6788 7 + /* PIPELINE_SYNC */
6790 8 + /* FENCE for VM_FLUSH */
6791 20 + /* GDS switch */
6792 4 + /* double SWITCH_BUFFER,
6793 the first COND_EXEC jump to the place just
6794 prior to this double SWITCH_BUFFER */
6802 8 + 8 + /* FENCE x2 */
6803 2, /* SWITCH_BUFFER */
6804 .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_gfx */
6805 .emit_ib = gfx_v8_0_ring_emit_ib_gfx,
6806 .emit_fence = gfx_v8_0_ring_emit_fence_gfx,
6807 .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6808 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6809 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6810 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6811 .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
6812 .test_ring = gfx_v8_0_ring_test_ring,
6813 .test_ib = gfx_v8_0_ring_test_ib,
6814 .insert_nop = amdgpu_ring_insert_nop,
6815 .pad_ib = amdgpu_ring_generic_pad_ib,
6816 .emit_switch_buffer = gfx_v8_ring_emit_sb,
6817 .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
6818 .init_cond_exec = gfx_v8_0_ring_emit_init_cond_exec,
6819 .patch_cond_exec = gfx_v8_0_ring_emit_patch_cond_exec,
6822 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
6823 .type = AMDGPU_RING_TYPE_COMPUTE,
6825 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6826 .support_64bit_ptrs = false,
6827 .get_rptr = gfx_v8_0_ring_get_rptr,
6828 .get_wptr = gfx_v8_0_ring_get_wptr_compute,
6829 .set_wptr = gfx_v8_0_ring_set_wptr_compute,
6831 20 + /* gfx_v8_0_ring_emit_gds_switch */
6832 7 + /* gfx_v8_0_ring_emit_hdp_flush */
6833 5 + /* gfx_v8_0_ring_emit_hdp_invalidate */
6834 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6835 17 + /* gfx_v8_0_ring_emit_vm_flush */
6836 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
6837 .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */
6838 .emit_ib = gfx_v8_0_ring_emit_ib_compute,
6839 .emit_fence = gfx_v8_0_ring_emit_fence_compute,
6840 .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6841 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6842 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6843 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6844 .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
6845 .test_ring = gfx_v8_0_ring_test_ring,
6846 .test_ib = gfx_v8_0_ring_test_ib,
6847 .insert_nop = amdgpu_ring_insert_nop,
6848 .pad_ib = amdgpu_ring_generic_pad_ib,
6851 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {
6852 .type = AMDGPU_RING_TYPE_KIQ,
6854 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6855 .support_64bit_ptrs = false,
6856 .get_rptr = gfx_v8_0_ring_get_rptr,
6857 .get_wptr = gfx_v8_0_ring_get_wptr_compute,
6858 .set_wptr = gfx_v8_0_ring_set_wptr_compute,
6860 20 + /* gfx_v8_0_ring_emit_gds_switch */
6861 7 + /* gfx_v8_0_ring_emit_hdp_flush */
6862 5 + /* gfx_v8_0_ring_emit_hdp_invalidate */
6863 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6864 17 + /* gfx_v8_0_ring_emit_vm_flush */
6865 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */
6866 .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */
6867 .emit_ib = gfx_v8_0_ring_emit_ib_compute,
6868 .emit_fence = gfx_v8_0_ring_emit_fence_kiq,
6869 .test_ring = gfx_v8_0_ring_test_ring,
6870 .test_ib = gfx_v8_0_ring_test_ib,
6871 .insert_nop = amdgpu_ring_insert_nop,
6872 .pad_ib = amdgpu_ring_generic_pad_ib,
6873 .emit_rreg = gfx_v8_0_ring_emit_rreg,
6874 .emit_wreg = gfx_v8_0_ring_emit_wreg,
6877 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
6881 adev->gfx.kiq.ring.funcs = &gfx_v8_0_ring_funcs_kiq;
6883 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
6884 adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
6886 for (i = 0; i < adev->gfx.num_compute_rings; i++)
6887 adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
6890 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
6891 .set = gfx_v8_0_set_eop_interrupt_state,
6892 .process = gfx_v8_0_eop_irq,
6895 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
6896 .set = gfx_v8_0_set_priv_reg_fault_state,
6897 .process = gfx_v8_0_priv_reg_irq,
6900 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
6901 .set = gfx_v8_0_set_priv_inst_fault_state,
6902 .process = gfx_v8_0_priv_inst_irq,
6905 static const struct amdgpu_irq_src_funcs gfx_v8_0_kiq_irq_funcs = {
6906 .set = gfx_v8_0_kiq_set_interrupt_state,
6907 .process = gfx_v8_0_kiq_irq,
6910 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
6912 adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
6913 adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
6915 adev->gfx.priv_reg_irq.num_types = 1;
6916 adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
6918 adev->gfx.priv_inst_irq.num_types = 1;
6919 adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
6921 adev->gfx.kiq.irq.num_types = AMDGPU_CP_KIQ_IRQ_LAST;
6922 adev->gfx.kiq.irq.funcs = &gfx_v8_0_kiq_irq_funcs;
6925 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
6927 adev->gfx.rlc.funcs = &iceland_rlc_funcs;
6930 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
6932 /* init asci gds info */
6933 adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
6934 adev->gds.gws.total_size = 64;
6935 adev->gds.oa.total_size = 16;
6937 if (adev->gds.mem.total_size == 64 * 1024) {
6938 adev->gds.mem.gfx_partition_size = 4096;
6939 adev->gds.mem.cs_partition_size = 4096;
6941 adev->gds.gws.gfx_partition_size = 4;
6942 adev->gds.gws.cs_partition_size = 4;
6944 adev->gds.oa.gfx_partition_size = 4;
6945 adev->gds.oa.cs_partition_size = 1;
6947 adev->gds.mem.gfx_partition_size = 1024;
6948 adev->gds.mem.cs_partition_size = 1024;
6950 adev->gds.gws.gfx_partition_size = 16;
6951 adev->gds.gws.cs_partition_size = 16;
6953 adev->gds.oa.gfx_partition_size = 4;
6954 adev->gds.oa.cs_partition_size = 4;
6958 static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
6966 data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
6967 data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
6969 WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
6972 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
6976 data = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) |
6977 RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
6979 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
6981 return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask;
6984 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
6986 int i, j, k, counter, active_cu_number = 0;
6987 u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
6988 struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
6989 unsigned disable_masks[4 * 2];
6992 memset(cu_info, 0, sizeof(*cu_info));
6994 if (adev->flags & AMD_IS_APU)
6997 ao_cu_num = adev->gfx.config.max_cu_per_sh;
6999 amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
7001 mutex_lock(&adev->grbm_idx_mutex);
7002 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
7003 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
7007 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
7009 gfx_v8_0_set_user_cu_inactive_bitmap(
7010 adev, disable_masks[i * 2 + j]);
7011 bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
7012 cu_info->bitmap[i][j] = bitmap;
7014 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
7015 if (bitmap & mask) {
7016 if (counter < ao_cu_num)
7022 active_cu_number += counter;
7024 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
7025 cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
7028 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
7029 mutex_unlock(&adev->grbm_idx_mutex);
7031 cu_info->number = active_cu_number;
7032 cu_info->ao_cu_mask = ao_cu_mask;
7035 const struct amdgpu_ip_block_version gfx_v8_0_ip_block =
7037 .type = AMD_IP_BLOCK_TYPE_GFX,
7041 .funcs = &gfx_v8_0_ip_funcs,
7044 const struct amdgpu_ip_block_version gfx_v8_1_ip_block =
7046 .type = AMD_IP_BLOCK_TYPE_GFX,
7050 .funcs = &gfx_v8_0_ip_funcs,
7053 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
7055 uint64_t ce_payload_addr;
7058 struct vi_ce_ib_state regular;
7059 struct vi_ce_ib_state_chained_ib chained;
7062 if (ring->adev->virt.chained_ib_support) {
7063 ce_payload_addr = AMDGPU_VA_RESERVED_SIZE - 2 * 4096 +
7064 offsetof(struct vi_gfx_meta_data_chained_ib, ce_payload);
7065 cnt_ce = (sizeof(ce_payload.chained) >> 2) + 4 - 2;
7067 ce_payload_addr = AMDGPU_VA_RESERVED_SIZE - 2 * 4096 +
7068 offsetof(struct vi_gfx_meta_data, ce_payload);
7069 cnt_ce = (sizeof(ce_payload.regular) >> 2) + 4 - 2;
7072 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_ce));
7073 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
7074 WRITE_DATA_DST_SEL(8) |
7076 WRITE_DATA_CACHE_POLICY(0));
7077 amdgpu_ring_write(ring, lower_32_bits(ce_payload_addr));
7078 amdgpu_ring_write(ring, upper_32_bits(ce_payload_addr));
7079 amdgpu_ring_write_multiple(ring, (void *)&ce_payload, cnt_ce - 2);
7082 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring)
7084 uint64_t de_payload_addr, gds_addr, csa_addr;
7087 struct vi_de_ib_state regular;
7088 struct vi_de_ib_state_chained_ib chained;
7091 csa_addr = AMDGPU_VA_RESERVED_SIZE - 2 * 4096;
7092 gds_addr = csa_addr + 4096;
7093 if (ring->adev->virt.chained_ib_support) {
7094 de_payload.chained.gds_backup_addrlo = lower_32_bits(gds_addr);
7095 de_payload.chained.gds_backup_addrhi = upper_32_bits(gds_addr);
7096 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data_chained_ib, de_payload);
7097 cnt_de = (sizeof(de_payload.chained) >> 2) + 4 - 2;
7099 de_payload.regular.gds_backup_addrlo = lower_32_bits(gds_addr);
7100 de_payload.regular.gds_backup_addrhi = upper_32_bits(gds_addr);
7101 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data, de_payload);
7102 cnt_de = (sizeof(de_payload.regular) >> 2) + 4 - 2;
7105 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_de));
7106 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
7107 WRITE_DATA_DST_SEL(8) |
7109 WRITE_DATA_CACHE_POLICY(0));
7110 amdgpu_ring_write(ring, lower_32_bits(de_payload_addr));
7111 amdgpu_ring_write(ring, upper_32_bits(de_payload_addr));
7112 amdgpu_ring_write_multiple(ring, (void *)&de_payload, cnt_de - 2);