2 * Copyright 2014 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
23 #include <linux/kernel.h>
24 #include <linux/firmware.h>
27 #include "amdgpu_gfx.h"
29 #include "vi_structs.h"
31 #include "amdgpu_ucode.h"
32 #include "amdgpu_atombios.h"
33 #include "atombios_i2c.h"
34 #include "clearstate_vi.h"
36 #include "gmc/gmc_8_2_d.h"
37 #include "gmc/gmc_8_2_sh_mask.h"
39 #include "oss/oss_3_0_d.h"
40 #include "oss/oss_3_0_sh_mask.h"
42 #include "bif/bif_5_0_d.h"
43 #include "bif/bif_5_0_sh_mask.h"
44 #include "gca/gfx_8_0_d.h"
45 #include "gca/gfx_8_0_enum.h"
46 #include "gca/gfx_8_0_sh_mask.h"
47 #include "gca/gfx_8_0_enum.h"
49 #include "dce/dce_10_0_d.h"
50 #include "dce/dce_10_0_sh_mask.h"
52 #include "smu/smu_7_1_3_d.h"
54 #define GFX8_NUM_GFX_RINGS 1
55 #define GFX8_MEC_HPD_SIZE 2048
57 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
58 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
59 #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
60 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
62 #define ARRAY_MODE(x) ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
63 #define PIPE_CONFIG(x) ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
64 #define TILE_SPLIT(x) ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
65 #define MICRO_TILE_MODE_NEW(x) ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
66 #define SAMPLE_SPLIT(x) ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
67 #define BANK_WIDTH(x) ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
68 #define BANK_HEIGHT(x) ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
69 #define MACRO_TILE_ASPECT(x) ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
70 #define NUM_BANKS(x) ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
72 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK 0x00000001L
73 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK 0x00000002L
74 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK 0x00000004L
75 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK 0x00000008L
76 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK 0x00000010L
77 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK 0x00000020L
80 #define SET_BPM_SERDES_CMD 1
81 #define CLE_BPM_SERDES_CMD 0
83 /* BPM Register Address*/
85 BPM_REG_CGLS_EN = 0, /* Enable/Disable CGLS */
86 BPM_REG_CGLS_ON, /* ON/OFF CGLS: shall be controlled by RLC FW */
87 BPM_REG_CGCG_OVERRIDE, /* Set/Clear CGCG Override */
88 BPM_REG_MGCG_OVERRIDE, /* Set/Clear MGCG Override */
89 BPM_REG_FGCG_OVERRIDE, /* Set/Clear FGCG Override */
93 #define RLC_FormatDirectRegListLength 14
95 MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
96 MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
97 MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
98 MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
99 MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
100 MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
102 MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
103 MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
104 MODULE_FIRMWARE("amdgpu/stoney_me.bin");
105 MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
106 MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
108 MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
109 MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
110 MODULE_FIRMWARE("amdgpu/tonga_me.bin");
111 MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
112 MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
113 MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
115 MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
116 MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
117 MODULE_FIRMWARE("amdgpu/topaz_me.bin");
118 MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
119 MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
121 MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
122 MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
123 MODULE_FIRMWARE("amdgpu/fiji_me.bin");
124 MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
125 MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
126 MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
128 MODULE_FIRMWARE("amdgpu/polaris11_ce.bin");
129 MODULE_FIRMWARE("amdgpu/polaris11_ce_2.bin");
130 MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin");
131 MODULE_FIRMWARE("amdgpu/polaris11_pfp_2.bin");
132 MODULE_FIRMWARE("amdgpu/polaris11_me.bin");
133 MODULE_FIRMWARE("amdgpu/polaris11_me_2.bin");
134 MODULE_FIRMWARE("amdgpu/polaris11_mec.bin");
135 MODULE_FIRMWARE("amdgpu/polaris11_mec_2.bin");
136 MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin");
137 MODULE_FIRMWARE("amdgpu/polaris11_mec2_2.bin");
138 MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin");
140 MODULE_FIRMWARE("amdgpu/polaris10_ce.bin");
141 MODULE_FIRMWARE("amdgpu/polaris10_ce_2.bin");
142 MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin");
143 MODULE_FIRMWARE("amdgpu/polaris10_pfp_2.bin");
144 MODULE_FIRMWARE("amdgpu/polaris10_me.bin");
145 MODULE_FIRMWARE("amdgpu/polaris10_me_2.bin");
146 MODULE_FIRMWARE("amdgpu/polaris10_mec.bin");
147 MODULE_FIRMWARE("amdgpu/polaris10_mec_2.bin");
148 MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin");
149 MODULE_FIRMWARE("amdgpu/polaris10_mec2_2.bin");
150 MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin");
152 MODULE_FIRMWARE("amdgpu/polaris12_ce.bin");
153 MODULE_FIRMWARE("amdgpu/polaris12_ce_2.bin");
154 MODULE_FIRMWARE("amdgpu/polaris12_pfp.bin");
155 MODULE_FIRMWARE("amdgpu/polaris12_pfp_2.bin");
156 MODULE_FIRMWARE("amdgpu/polaris12_me.bin");
157 MODULE_FIRMWARE("amdgpu/polaris12_me_2.bin");
158 MODULE_FIRMWARE("amdgpu/polaris12_mec.bin");
159 MODULE_FIRMWARE("amdgpu/polaris12_mec_2.bin");
160 MODULE_FIRMWARE("amdgpu/polaris12_mec2.bin");
161 MODULE_FIRMWARE("amdgpu/polaris12_mec2_2.bin");
162 MODULE_FIRMWARE("amdgpu/polaris12_rlc.bin");
164 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
166 {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
167 {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
168 {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
169 {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
170 {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
171 {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
172 {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
173 {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
174 {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
175 {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
176 {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
177 {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
178 {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
179 {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
180 {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
181 {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
184 static const u32 golden_settings_tonga_a11[] =
186 mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
187 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
188 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
189 mmGB_GPU_ID, 0x0000000f, 0x00000000,
190 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
191 mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
192 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
193 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
194 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
195 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
196 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
197 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
198 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
199 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
200 mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
201 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
204 static const u32 tonga_golden_common_all[] =
206 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
207 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
208 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
209 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
210 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
211 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
212 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
213 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
216 static const u32 tonga_mgcg_cgcg_init[] =
218 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
219 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
220 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
221 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
222 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
223 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
224 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
225 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
226 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
227 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
228 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
229 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
230 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
231 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
232 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
233 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
234 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
235 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
236 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
237 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
238 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
239 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
240 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
241 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
242 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
243 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
244 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
245 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
246 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
247 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
248 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
249 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
250 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
251 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
252 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
253 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
254 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
255 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
256 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
257 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
258 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
259 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
260 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
261 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
262 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
263 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
264 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
265 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
266 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
267 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
268 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
269 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
270 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
271 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
272 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
273 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
274 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
275 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
276 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
277 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
278 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
279 mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
280 mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
281 mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
282 mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
283 mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
284 mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
285 mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
286 mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
287 mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
288 mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
289 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
290 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
291 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
292 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
295 static const u32 golden_settings_polaris11_a11[] =
297 mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208,
298 mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
299 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
300 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
301 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
302 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
303 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
304 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
305 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
306 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
307 mmSQ_CONFIG, 0x07f80000, 0x01180000,
308 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
309 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
310 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
311 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
312 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
313 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
316 static const u32 polaris11_golden_common_all[] =
318 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
319 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002,
320 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
321 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
322 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
323 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
326 static const u32 golden_settings_polaris10_a11[] =
328 mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
329 mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
330 mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
331 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
332 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
333 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
334 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
335 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
336 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a,
337 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
338 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
339 mmSQ_CONFIG, 0x07f80000, 0x07180000,
340 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
341 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
342 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
343 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
344 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
347 static const u32 polaris10_golden_common_all[] =
349 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
350 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
351 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
352 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
353 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
354 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
355 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
356 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
359 static const u32 fiji_golden_common_all[] =
361 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
362 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
363 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
364 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
365 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
366 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
367 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
368 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
369 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
370 mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
373 static const u32 golden_settings_fiji_a10[] =
375 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
376 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
377 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
378 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
379 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
380 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
381 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
382 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
383 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
384 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
385 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
388 static const u32 fiji_mgcg_cgcg_init[] =
390 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
391 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
392 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
393 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
394 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
395 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
396 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
397 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
398 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
399 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
400 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
401 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
402 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
403 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
404 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
405 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
406 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
407 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
408 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
409 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
410 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
411 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
412 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
413 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
414 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
415 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
416 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
417 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
418 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
419 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
420 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
421 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
422 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
423 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
424 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
427 static const u32 golden_settings_iceland_a11[] =
429 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
430 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
431 mmDB_DEBUG3, 0xc0000000, 0xc0000000,
432 mmGB_GPU_ID, 0x0000000f, 0x00000000,
433 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
434 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
435 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
436 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
437 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
438 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
439 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
440 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
441 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
442 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
443 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
444 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
447 static const u32 iceland_golden_common_all[] =
449 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
450 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
451 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
452 mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
453 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
454 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
455 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
456 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
459 static const u32 iceland_mgcg_cgcg_init[] =
461 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
462 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
463 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
464 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
465 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
466 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
467 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
468 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
469 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
470 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
471 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
472 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
473 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
474 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
475 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
476 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
477 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
478 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
479 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
480 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
481 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
482 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
483 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
484 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
485 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
486 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
487 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
488 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
489 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
490 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
491 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
492 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
493 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
494 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
495 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
496 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
497 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
498 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
499 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
500 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
501 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
502 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
503 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
504 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
505 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
506 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
507 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
508 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
509 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
510 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
511 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
512 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
513 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
514 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
515 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
516 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
517 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
518 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
519 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
520 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
521 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
522 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
523 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
524 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
527 static const u32 cz_golden_settings_a11[] =
529 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
530 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
531 mmGB_GPU_ID, 0x0000000f, 0x00000000,
532 mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
533 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
534 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
535 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
536 mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
537 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
538 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
539 mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
540 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
543 static const u32 cz_golden_common_all[] =
545 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
546 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
547 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
548 mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
549 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
550 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
551 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
552 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
555 static const u32 cz_mgcg_cgcg_init[] =
557 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
558 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
559 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
560 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
561 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
562 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
563 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
564 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
565 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
566 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
567 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
568 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
569 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
570 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
571 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
572 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
573 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
574 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
575 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
576 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
577 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
578 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
579 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
580 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
581 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
582 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
583 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
584 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
585 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
586 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
587 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
588 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
589 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
590 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
591 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
592 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
593 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
594 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
595 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
596 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
597 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
598 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
599 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
600 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
601 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
602 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
603 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
604 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
605 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
606 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
607 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
608 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
609 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
610 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
611 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
612 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
613 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
614 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
615 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
616 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
617 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
618 mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
619 mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
620 mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
621 mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
622 mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
623 mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
624 mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
625 mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
626 mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
627 mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
628 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
629 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
630 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
631 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
634 static const u32 stoney_golden_settings_a11[] =
636 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
637 mmGB_GPU_ID, 0x0000000f, 0x00000000,
638 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
639 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
640 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
641 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
642 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
643 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
644 mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
645 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
648 static const u32 stoney_golden_common_all[] =
650 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
651 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
652 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
653 mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
654 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
655 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
656 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
657 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
660 static const u32 stoney_mgcg_cgcg_init[] =
662 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
663 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
664 mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
665 mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
666 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
669 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
670 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
671 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
672 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
673 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
674 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
675 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring);
676 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring);
678 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
680 switch (adev->asic_type) {
682 amdgpu_program_register_sequence(adev,
683 iceland_mgcg_cgcg_init,
684 (const u32)ARRAY_SIZE(iceland_mgcg_cgcg_init));
685 amdgpu_program_register_sequence(adev,
686 golden_settings_iceland_a11,
687 (const u32)ARRAY_SIZE(golden_settings_iceland_a11));
688 amdgpu_program_register_sequence(adev,
689 iceland_golden_common_all,
690 (const u32)ARRAY_SIZE(iceland_golden_common_all));
693 amdgpu_program_register_sequence(adev,
695 (const u32)ARRAY_SIZE(fiji_mgcg_cgcg_init));
696 amdgpu_program_register_sequence(adev,
697 golden_settings_fiji_a10,
698 (const u32)ARRAY_SIZE(golden_settings_fiji_a10));
699 amdgpu_program_register_sequence(adev,
700 fiji_golden_common_all,
701 (const u32)ARRAY_SIZE(fiji_golden_common_all));
705 amdgpu_program_register_sequence(adev,
706 tonga_mgcg_cgcg_init,
707 (const u32)ARRAY_SIZE(tonga_mgcg_cgcg_init));
708 amdgpu_program_register_sequence(adev,
709 golden_settings_tonga_a11,
710 (const u32)ARRAY_SIZE(golden_settings_tonga_a11));
711 amdgpu_program_register_sequence(adev,
712 tonga_golden_common_all,
713 (const u32)ARRAY_SIZE(tonga_golden_common_all));
717 amdgpu_program_register_sequence(adev,
718 golden_settings_polaris11_a11,
719 (const u32)ARRAY_SIZE(golden_settings_polaris11_a11));
720 amdgpu_program_register_sequence(adev,
721 polaris11_golden_common_all,
722 (const u32)ARRAY_SIZE(polaris11_golden_common_all));
725 amdgpu_program_register_sequence(adev,
726 golden_settings_polaris10_a11,
727 (const u32)ARRAY_SIZE(golden_settings_polaris10_a11));
728 amdgpu_program_register_sequence(adev,
729 polaris10_golden_common_all,
730 (const u32)ARRAY_SIZE(polaris10_golden_common_all));
731 WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C);
732 if (adev->pdev->revision == 0xc7 &&
733 ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) ||
734 (adev->pdev->subsystem_device == 0x4a8 && adev->pdev->subsystem_vendor == 0x1043) ||
735 (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1682))) {
736 amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD);
737 amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0);
741 amdgpu_program_register_sequence(adev,
743 (const u32)ARRAY_SIZE(cz_mgcg_cgcg_init));
744 amdgpu_program_register_sequence(adev,
745 cz_golden_settings_a11,
746 (const u32)ARRAY_SIZE(cz_golden_settings_a11));
747 amdgpu_program_register_sequence(adev,
748 cz_golden_common_all,
749 (const u32)ARRAY_SIZE(cz_golden_common_all));
752 amdgpu_program_register_sequence(adev,
753 stoney_mgcg_cgcg_init,
754 (const u32)ARRAY_SIZE(stoney_mgcg_cgcg_init));
755 amdgpu_program_register_sequence(adev,
756 stoney_golden_settings_a11,
757 (const u32)ARRAY_SIZE(stoney_golden_settings_a11));
758 amdgpu_program_register_sequence(adev,
759 stoney_golden_common_all,
760 (const u32)ARRAY_SIZE(stoney_golden_common_all));
767 static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
769 adev->gfx.scratch.num_reg = 8;
770 adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
771 adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
774 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
776 struct amdgpu_device *adev = ring->adev;
782 r = amdgpu_gfx_scratch_get(adev, &scratch);
784 DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
787 WREG32(scratch, 0xCAFEDEAD);
788 r = amdgpu_ring_alloc(ring, 3);
790 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
792 amdgpu_gfx_scratch_free(adev, scratch);
795 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
796 amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
797 amdgpu_ring_write(ring, 0xDEADBEEF);
798 amdgpu_ring_commit(ring);
800 for (i = 0; i < adev->usec_timeout; i++) {
801 tmp = RREG32(scratch);
802 if (tmp == 0xDEADBEEF)
806 if (i < adev->usec_timeout) {
807 DRM_INFO("ring test on %d succeeded in %d usecs\n",
810 DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
811 ring->idx, scratch, tmp);
814 amdgpu_gfx_scratch_free(adev, scratch);
818 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
820 struct amdgpu_device *adev = ring->adev;
822 struct dma_fence *f = NULL;
827 r = amdgpu_gfx_scratch_get(adev, &scratch);
829 DRM_ERROR("amdgpu: failed to get scratch reg (%ld).\n", r);
832 WREG32(scratch, 0xCAFEDEAD);
833 memset(&ib, 0, sizeof(ib));
834 r = amdgpu_ib_get(adev, NULL, 256, &ib);
836 DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
839 ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
840 ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START));
841 ib.ptr[2] = 0xDEADBEEF;
844 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
848 r = dma_fence_wait_timeout(f, false, timeout);
850 DRM_ERROR("amdgpu: IB test timed out.\n");
854 DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
857 tmp = RREG32(scratch);
858 if (tmp == 0xDEADBEEF) {
859 DRM_INFO("ib test on ring %d succeeded\n", ring->idx);
862 DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n",
867 amdgpu_ib_free(adev, &ib, NULL);
870 amdgpu_gfx_scratch_free(adev, scratch);
875 static void gfx_v8_0_free_microcode(struct amdgpu_device *adev)
877 release_firmware(adev->gfx.pfp_fw);
878 adev->gfx.pfp_fw = NULL;
879 release_firmware(adev->gfx.me_fw);
880 adev->gfx.me_fw = NULL;
881 release_firmware(adev->gfx.ce_fw);
882 adev->gfx.ce_fw = NULL;
883 release_firmware(adev->gfx.rlc_fw);
884 adev->gfx.rlc_fw = NULL;
885 release_firmware(adev->gfx.mec_fw);
886 adev->gfx.mec_fw = NULL;
887 if ((adev->asic_type != CHIP_STONEY) &&
888 (adev->asic_type != CHIP_TOPAZ))
889 release_firmware(adev->gfx.mec2_fw);
890 adev->gfx.mec2_fw = NULL;
892 kfree(adev->gfx.rlc.register_list_format);
895 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
897 const char *chip_name;
900 struct amdgpu_firmware_info *info = NULL;
901 const struct common_firmware_header *header = NULL;
902 const struct gfx_firmware_header_v1_0 *cp_hdr;
903 const struct rlc_firmware_header_v2_0 *rlc_hdr;
904 unsigned int *tmp = NULL, i;
908 switch (adev->asic_type) {
916 chip_name = "carrizo";
922 chip_name = "polaris11";
925 chip_name = "polaris10";
928 chip_name = "polaris12";
931 chip_name = "stoney";
937 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
938 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp_2.bin", chip_name);
939 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
940 if (err == -ENOENT) {
941 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
942 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
945 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
946 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
950 err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
953 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
954 adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
955 adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
957 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
958 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me_2.bin", chip_name);
959 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
960 if (err == -ENOENT) {
961 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
962 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
965 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
966 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
970 err = amdgpu_ucode_validate(adev->gfx.me_fw);
973 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
974 adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
976 adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
978 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
979 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce_2.bin", chip_name);
980 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
981 if (err == -ENOENT) {
982 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
983 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
986 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
987 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
991 err = amdgpu_ucode_validate(adev->gfx.ce_fw);
994 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
995 adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
996 adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
999 * Support for MCBP/Virtualization in combination with chained IBs is
1000 * formal released on feature version #46
1002 if (adev->gfx.ce_feature_version >= 46 &&
1003 adev->gfx.pfp_feature_version >= 46) {
1004 adev->virt.chained_ib_support = true;
1005 DRM_INFO("Chained IB support enabled!\n");
1007 adev->virt.chained_ib_support = false;
1009 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
1010 err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
1013 err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
1014 rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1015 adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
1016 adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
1018 adev->gfx.rlc.save_and_restore_offset =
1019 le32_to_cpu(rlc_hdr->save_and_restore_offset);
1020 adev->gfx.rlc.clear_state_descriptor_offset =
1021 le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
1022 adev->gfx.rlc.avail_scratch_ram_locations =
1023 le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
1024 adev->gfx.rlc.reg_restore_list_size =
1025 le32_to_cpu(rlc_hdr->reg_restore_list_size);
1026 adev->gfx.rlc.reg_list_format_start =
1027 le32_to_cpu(rlc_hdr->reg_list_format_start);
1028 adev->gfx.rlc.reg_list_format_separate_start =
1029 le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
1030 adev->gfx.rlc.starting_offsets_start =
1031 le32_to_cpu(rlc_hdr->starting_offsets_start);
1032 adev->gfx.rlc.reg_list_format_size_bytes =
1033 le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
1034 adev->gfx.rlc.reg_list_size_bytes =
1035 le32_to_cpu(rlc_hdr->reg_list_size_bytes);
1037 adev->gfx.rlc.register_list_format =
1038 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
1039 adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
1041 if (!adev->gfx.rlc.register_list_format) {
1046 tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1047 le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1048 for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++)
1049 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
1051 adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1053 tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1054 le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1055 for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++)
1056 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
1058 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1059 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec_2.bin", chip_name);
1060 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1061 if (err == -ENOENT) {
1062 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1063 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1066 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1067 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1071 err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1074 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1075 adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1076 adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1078 if ((adev->asic_type != CHIP_STONEY) &&
1079 (adev->asic_type != CHIP_TOPAZ)) {
1080 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1081 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2_2.bin", chip_name);
1082 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1083 if (err == -ENOENT) {
1084 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1085 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1088 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1089 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1092 err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1095 cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1096 adev->gfx.mec2_fw->data;
1097 adev->gfx.mec2_fw_version =
1098 le32_to_cpu(cp_hdr->header.ucode_version);
1099 adev->gfx.mec2_feature_version =
1100 le32_to_cpu(cp_hdr->ucode_feature_version);
1103 adev->gfx.mec2_fw = NULL;
1107 if (adev->firmware.load_type == AMDGPU_FW_LOAD_SMU) {
1108 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1109 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1110 info->fw = adev->gfx.pfp_fw;
1111 header = (const struct common_firmware_header *)info->fw->data;
1112 adev->firmware.fw_size +=
1113 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1115 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1116 info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1117 info->fw = adev->gfx.me_fw;
1118 header = (const struct common_firmware_header *)info->fw->data;
1119 adev->firmware.fw_size +=
1120 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1122 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1123 info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1124 info->fw = adev->gfx.ce_fw;
1125 header = (const struct common_firmware_header *)info->fw->data;
1126 adev->firmware.fw_size +=
1127 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1129 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1130 info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1131 info->fw = adev->gfx.rlc_fw;
1132 header = (const struct common_firmware_header *)info->fw->data;
1133 adev->firmware.fw_size +=
1134 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1136 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1137 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1138 info->fw = adev->gfx.mec_fw;
1139 header = (const struct common_firmware_header *)info->fw->data;
1140 adev->firmware.fw_size +=
1141 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1143 /* we need account JT in */
1144 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1145 adev->firmware.fw_size +=
1146 ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE);
1148 if (amdgpu_sriov_vf(adev)) {
1149 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_STORAGE];
1150 info->ucode_id = AMDGPU_UCODE_ID_STORAGE;
1151 info->fw = adev->gfx.mec_fw;
1152 adev->firmware.fw_size +=
1153 ALIGN(le32_to_cpu(64 * PAGE_SIZE), PAGE_SIZE);
1156 if (adev->gfx.mec2_fw) {
1157 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1158 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1159 info->fw = adev->gfx.mec2_fw;
1160 header = (const struct common_firmware_header *)info->fw->data;
1161 adev->firmware.fw_size +=
1162 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1170 "gfx8: Failed to load firmware \"%s\"\n",
1172 release_firmware(adev->gfx.pfp_fw);
1173 adev->gfx.pfp_fw = NULL;
1174 release_firmware(adev->gfx.me_fw);
1175 adev->gfx.me_fw = NULL;
1176 release_firmware(adev->gfx.ce_fw);
1177 adev->gfx.ce_fw = NULL;
1178 release_firmware(adev->gfx.rlc_fw);
1179 adev->gfx.rlc_fw = NULL;
1180 release_firmware(adev->gfx.mec_fw);
1181 adev->gfx.mec_fw = NULL;
1182 release_firmware(adev->gfx.mec2_fw);
1183 adev->gfx.mec2_fw = NULL;
1188 static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
1189 volatile u32 *buffer)
1192 const struct cs_section_def *sect = NULL;
1193 const struct cs_extent_def *ext = NULL;
1195 if (adev->gfx.rlc.cs_data == NULL)
1200 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1201 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1203 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1204 buffer[count++] = cpu_to_le32(0x80000000);
1205 buffer[count++] = cpu_to_le32(0x80000000);
1207 for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1208 for (ext = sect->section; ext->extent != NULL; ++ext) {
1209 if (sect->id == SECT_CONTEXT) {
1211 cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1212 buffer[count++] = cpu_to_le32(ext->reg_index -
1213 PACKET3_SET_CONTEXT_REG_START);
1214 for (i = 0; i < ext->reg_count; i++)
1215 buffer[count++] = cpu_to_le32(ext->extent[i]);
1222 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1223 buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
1224 PACKET3_SET_CONTEXT_REG_START);
1225 buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config);
1226 buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1);
1228 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1229 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1231 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1232 buffer[count++] = cpu_to_le32(0);
1235 static void cz_init_cp_jump_table(struct amdgpu_device *adev)
1237 const __le32 *fw_data;
1238 volatile u32 *dst_ptr;
1239 int me, i, max_me = 4;
1241 u32 table_offset, table_size;
1243 if (adev->asic_type == CHIP_CARRIZO)
1246 /* write the cp table buffer */
1247 dst_ptr = adev->gfx.rlc.cp_table_ptr;
1248 for (me = 0; me < max_me; me++) {
1250 const struct gfx_firmware_header_v1_0 *hdr =
1251 (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1252 fw_data = (const __le32 *)
1253 (adev->gfx.ce_fw->data +
1254 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1255 table_offset = le32_to_cpu(hdr->jt_offset);
1256 table_size = le32_to_cpu(hdr->jt_size);
1257 } else if (me == 1) {
1258 const struct gfx_firmware_header_v1_0 *hdr =
1259 (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1260 fw_data = (const __le32 *)
1261 (adev->gfx.pfp_fw->data +
1262 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1263 table_offset = le32_to_cpu(hdr->jt_offset);
1264 table_size = le32_to_cpu(hdr->jt_size);
1265 } else if (me == 2) {
1266 const struct gfx_firmware_header_v1_0 *hdr =
1267 (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1268 fw_data = (const __le32 *)
1269 (adev->gfx.me_fw->data +
1270 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1271 table_offset = le32_to_cpu(hdr->jt_offset);
1272 table_size = le32_to_cpu(hdr->jt_size);
1273 } else if (me == 3) {
1274 const struct gfx_firmware_header_v1_0 *hdr =
1275 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1276 fw_data = (const __le32 *)
1277 (adev->gfx.mec_fw->data +
1278 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1279 table_offset = le32_to_cpu(hdr->jt_offset);
1280 table_size = le32_to_cpu(hdr->jt_size);
1281 } else if (me == 4) {
1282 const struct gfx_firmware_header_v1_0 *hdr =
1283 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
1284 fw_data = (const __le32 *)
1285 (adev->gfx.mec2_fw->data +
1286 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1287 table_offset = le32_to_cpu(hdr->jt_offset);
1288 table_size = le32_to_cpu(hdr->jt_size);
1291 for (i = 0; i < table_size; i ++) {
1292 dst_ptr[bo_offset + i] =
1293 cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
1296 bo_offset += table_size;
1300 static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev)
1302 amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, NULL, NULL);
1303 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, NULL, NULL);
1306 static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
1308 volatile u32 *dst_ptr;
1310 const struct cs_section_def *cs_data;
1313 adev->gfx.rlc.cs_data = vi_cs_data;
1315 cs_data = adev->gfx.rlc.cs_data;
1318 /* clear state block */
1319 adev->gfx.rlc.clear_state_size = dws = gfx_v8_0_get_csb_size(adev);
1321 r = amdgpu_bo_create_reserved(adev, dws * 4, PAGE_SIZE,
1322 AMDGPU_GEM_DOMAIN_VRAM,
1323 &adev->gfx.rlc.clear_state_obj,
1324 &adev->gfx.rlc.clear_state_gpu_addr,
1325 (void **)&adev->gfx.rlc.cs_ptr);
1327 dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r);
1328 gfx_v8_0_rlc_fini(adev);
1332 /* set up the cs buffer */
1333 dst_ptr = adev->gfx.rlc.cs_ptr;
1334 gfx_v8_0_get_csb_buffer(adev, dst_ptr);
1335 amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
1336 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1339 if ((adev->asic_type == CHIP_CARRIZO) ||
1340 (adev->asic_type == CHIP_STONEY)) {
1341 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1342 r = amdgpu_bo_create_reserved(adev, adev->gfx.rlc.cp_table_size,
1343 PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
1344 &adev->gfx.rlc.cp_table_obj,
1345 &adev->gfx.rlc.cp_table_gpu_addr,
1346 (void **)&adev->gfx.rlc.cp_table_ptr);
1348 dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r);
1352 cz_init_cp_jump_table(adev);
1354 amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj);
1355 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1361 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
1363 amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1366 static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
1370 size_t mec_hpd_size;
1372 bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1374 /* take ownership of the relevant compute queues */
1375 amdgpu_gfx_compute_queue_acquire(adev);
1377 mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE;
1379 r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1380 AMDGPU_GEM_DOMAIN_GTT,
1381 &adev->gfx.mec.hpd_eop_obj,
1382 &adev->gfx.mec.hpd_eop_gpu_addr,
1385 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1389 memset(hpd, 0, mec_hpd_size);
1391 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1392 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1397 static const u32 vgpr_init_compute_shader[] =
1399 0x7e000209, 0x7e020208,
1400 0x7e040207, 0x7e060206,
1401 0x7e080205, 0x7e0a0204,
1402 0x7e0c0203, 0x7e0e0202,
1403 0x7e100201, 0x7e120200,
1404 0x7e140209, 0x7e160208,
1405 0x7e180207, 0x7e1a0206,
1406 0x7e1c0205, 0x7e1e0204,
1407 0x7e200203, 0x7e220202,
1408 0x7e240201, 0x7e260200,
1409 0x7e280209, 0x7e2a0208,
1410 0x7e2c0207, 0x7e2e0206,
1411 0x7e300205, 0x7e320204,
1412 0x7e340203, 0x7e360202,
1413 0x7e380201, 0x7e3a0200,
1414 0x7e3c0209, 0x7e3e0208,
1415 0x7e400207, 0x7e420206,
1416 0x7e440205, 0x7e460204,
1417 0x7e480203, 0x7e4a0202,
1418 0x7e4c0201, 0x7e4e0200,
1419 0x7e500209, 0x7e520208,
1420 0x7e540207, 0x7e560206,
1421 0x7e580205, 0x7e5a0204,
1422 0x7e5c0203, 0x7e5e0202,
1423 0x7e600201, 0x7e620200,
1424 0x7e640209, 0x7e660208,
1425 0x7e680207, 0x7e6a0206,
1426 0x7e6c0205, 0x7e6e0204,
1427 0x7e700203, 0x7e720202,
1428 0x7e740201, 0x7e760200,
1429 0x7e780209, 0x7e7a0208,
1430 0x7e7c0207, 0x7e7e0206,
1431 0xbf8a0000, 0xbf810000,
1434 static const u32 sgpr_init_compute_shader[] =
1436 0xbe8a0100, 0xbe8c0102,
1437 0xbe8e0104, 0xbe900106,
1438 0xbe920108, 0xbe940100,
1439 0xbe960102, 0xbe980104,
1440 0xbe9a0106, 0xbe9c0108,
1441 0xbe9e0100, 0xbea00102,
1442 0xbea20104, 0xbea40106,
1443 0xbea60108, 0xbea80100,
1444 0xbeaa0102, 0xbeac0104,
1445 0xbeae0106, 0xbeb00108,
1446 0xbeb20100, 0xbeb40102,
1447 0xbeb60104, 0xbeb80106,
1448 0xbeba0108, 0xbebc0100,
1449 0xbebe0102, 0xbec00104,
1450 0xbec20106, 0xbec40108,
1451 0xbec60100, 0xbec80102,
1452 0xbee60004, 0xbee70005,
1453 0xbeea0006, 0xbeeb0007,
1454 0xbee80008, 0xbee90009,
1455 0xbefc0000, 0xbf8a0000,
1456 0xbf810000, 0x00000000,
1459 static const u32 vgpr_init_regs[] =
1461 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1462 mmCOMPUTE_RESOURCE_LIMITS, 0,
1463 mmCOMPUTE_NUM_THREAD_X, 256*4,
1464 mmCOMPUTE_NUM_THREAD_Y, 1,
1465 mmCOMPUTE_NUM_THREAD_Z, 1,
1466 mmCOMPUTE_PGM_RSRC2, 20,
1467 mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1468 mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1469 mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1470 mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1471 mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1472 mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1473 mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1474 mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1475 mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1476 mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1479 static const u32 sgpr1_init_regs[] =
1481 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1482 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1483 mmCOMPUTE_NUM_THREAD_X, 256*5,
1484 mmCOMPUTE_NUM_THREAD_Y, 1,
1485 mmCOMPUTE_NUM_THREAD_Z, 1,
1486 mmCOMPUTE_PGM_RSRC2, 20,
1487 mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1488 mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1489 mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1490 mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1491 mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1492 mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1493 mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1494 mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1495 mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1496 mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1499 static const u32 sgpr2_init_regs[] =
1501 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1502 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1503 mmCOMPUTE_NUM_THREAD_X, 256*5,
1504 mmCOMPUTE_NUM_THREAD_Y, 1,
1505 mmCOMPUTE_NUM_THREAD_Z, 1,
1506 mmCOMPUTE_PGM_RSRC2, 20,
1507 mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1508 mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1509 mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1510 mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1511 mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1512 mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1513 mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1514 mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1515 mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1516 mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1519 static const u32 sec_ded_counter_registers[] =
1522 mmCPC_EDC_SCRATCH_CNT,
1523 mmCPC_EDC_UCODE_CNT,
1530 mmDC_EDC_CSINVOC_CNT,
1531 mmDC_EDC_RESTORE_CNT,
1537 mmSQC_ATC_EDC_GATCL1_CNT,
1543 mmTCP_ATC_EDC_GATCL1_CNT,
1548 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1550 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1551 struct amdgpu_ib ib;
1552 struct dma_fence *f = NULL;
1555 unsigned total_size, vgpr_offset, sgpr_offset;
1558 /* only supported on CZ */
1559 if (adev->asic_type != CHIP_CARRIZO)
1562 /* bail if the compute ring is not ready */
1566 tmp = RREG32(mmGB_EDC_MODE);
1567 WREG32(mmGB_EDC_MODE, 0);
1570 (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1572 (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1574 (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1575 total_size = ALIGN(total_size, 256);
1576 vgpr_offset = total_size;
1577 total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1578 sgpr_offset = total_size;
1579 total_size += sizeof(sgpr_init_compute_shader);
1581 /* allocate an indirect buffer to put the commands in */
1582 memset(&ib, 0, sizeof(ib));
1583 r = amdgpu_ib_get(adev, NULL, total_size, &ib);
1585 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1589 /* load the compute shaders */
1590 for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1591 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1593 for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1594 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1596 /* init the ib length to 0 */
1600 /* write the register state for the compute dispatch */
1601 for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1602 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1603 ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1604 ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1606 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1607 gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1608 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1609 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1610 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1611 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1613 /* write dispatch packet */
1614 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1615 ib.ptr[ib.length_dw++] = 8; /* x */
1616 ib.ptr[ib.length_dw++] = 1; /* y */
1617 ib.ptr[ib.length_dw++] = 1; /* z */
1618 ib.ptr[ib.length_dw++] =
1619 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1621 /* write CS partial flush packet */
1622 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1623 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1626 /* write the register state for the compute dispatch */
1627 for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1628 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1629 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1630 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1632 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1633 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1634 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1635 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1636 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1637 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1639 /* write dispatch packet */
1640 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1641 ib.ptr[ib.length_dw++] = 8; /* x */
1642 ib.ptr[ib.length_dw++] = 1; /* y */
1643 ib.ptr[ib.length_dw++] = 1; /* z */
1644 ib.ptr[ib.length_dw++] =
1645 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1647 /* write CS partial flush packet */
1648 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1649 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1652 /* write the register state for the compute dispatch */
1653 for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1654 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1655 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1656 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1658 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1659 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1660 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1661 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1662 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1663 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1665 /* write dispatch packet */
1666 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1667 ib.ptr[ib.length_dw++] = 8; /* x */
1668 ib.ptr[ib.length_dw++] = 1; /* y */
1669 ib.ptr[ib.length_dw++] = 1; /* z */
1670 ib.ptr[ib.length_dw++] =
1671 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1673 /* write CS partial flush packet */
1674 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1675 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1677 /* shedule the ib on the ring */
1678 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1680 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1684 /* wait for the GPU to finish processing the IB */
1685 r = dma_fence_wait(f, false);
1687 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1691 tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1692 tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1693 WREG32(mmGB_EDC_MODE, tmp);
1695 tmp = RREG32(mmCC_GC_EDC_CONFIG);
1696 tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1697 WREG32(mmCC_GC_EDC_CONFIG, tmp);
1700 /* read back registers to clear the counters */
1701 for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1702 RREG32(sec_ded_counter_registers[i]);
1705 amdgpu_ib_free(adev, &ib, NULL);
1711 static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1714 u32 mc_shared_chmap, mc_arb_ramcfg;
1715 u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1719 switch (adev->asic_type) {
1721 adev->gfx.config.max_shader_engines = 1;
1722 adev->gfx.config.max_tile_pipes = 2;
1723 adev->gfx.config.max_cu_per_sh = 6;
1724 adev->gfx.config.max_sh_per_se = 1;
1725 adev->gfx.config.max_backends_per_se = 2;
1726 adev->gfx.config.max_texture_channel_caches = 2;
1727 adev->gfx.config.max_gprs = 256;
1728 adev->gfx.config.max_gs_threads = 32;
1729 adev->gfx.config.max_hw_contexts = 8;
1731 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1732 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1733 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1734 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1735 gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1738 adev->gfx.config.max_shader_engines = 4;
1739 adev->gfx.config.max_tile_pipes = 16;
1740 adev->gfx.config.max_cu_per_sh = 16;
1741 adev->gfx.config.max_sh_per_se = 1;
1742 adev->gfx.config.max_backends_per_se = 4;
1743 adev->gfx.config.max_texture_channel_caches = 16;
1744 adev->gfx.config.max_gprs = 256;
1745 adev->gfx.config.max_gs_threads = 32;
1746 adev->gfx.config.max_hw_contexts = 8;
1748 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1749 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1750 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1751 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1752 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1754 case CHIP_POLARIS11:
1755 case CHIP_POLARIS12:
1756 ret = amdgpu_atombios_get_gfx_info(adev);
1759 adev->gfx.config.max_gprs = 256;
1760 adev->gfx.config.max_gs_threads = 32;
1761 adev->gfx.config.max_hw_contexts = 8;
1763 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1764 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1765 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1766 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1767 gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
1769 case CHIP_POLARIS10:
1770 ret = amdgpu_atombios_get_gfx_info(adev);
1773 adev->gfx.config.max_gprs = 256;
1774 adev->gfx.config.max_gs_threads = 32;
1775 adev->gfx.config.max_hw_contexts = 8;
1777 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1778 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1779 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1780 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1781 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1784 adev->gfx.config.max_shader_engines = 4;
1785 adev->gfx.config.max_tile_pipes = 8;
1786 adev->gfx.config.max_cu_per_sh = 8;
1787 adev->gfx.config.max_sh_per_se = 1;
1788 adev->gfx.config.max_backends_per_se = 2;
1789 adev->gfx.config.max_texture_channel_caches = 8;
1790 adev->gfx.config.max_gprs = 256;
1791 adev->gfx.config.max_gs_threads = 32;
1792 adev->gfx.config.max_hw_contexts = 8;
1794 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1795 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1796 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1797 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1798 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1801 adev->gfx.config.max_shader_engines = 1;
1802 adev->gfx.config.max_tile_pipes = 2;
1803 adev->gfx.config.max_sh_per_se = 1;
1804 adev->gfx.config.max_backends_per_se = 2;
1805 adev->gfx.config.max_cu_per_sh = 8;
1806 adev->gfx.config.max_texture_channel_caches = 2;
1807 adev->gfx.config.max_gprs = 256;
1808 adev->gfx.config.max_gs_threads = 32;
1809 adev->gfx.config.max_hw_contexts = 8;
1811 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1812 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1813 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1814 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1815 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1818 adev->gfx.config.max_shader_engines = 1;
1819 adev->gfx.config.max_tile_pipes = 2;
1820 adev->gfx.config.max_sh_per_se = 1;
1821 adev->gfx.config.max_backends_per_se = 1;
1822 adev->gfx.config.max_cu_per_sh = 3;
1823 adev->gfx.config.max_texture_channel_caches = 2;
1824 adev->gfx.config.max_gprs = 256;
1825 adev->gfx.config.max_gs_threads = 16;
1826 adev->gfx.config.max_hw_contexts = 8;
1828 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1829 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1830 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1831 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1832 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1835 adev->gfx.config.max_shader_engines = 2;
1836 adev->gfx.config.max_tile_pipes = 4;
1837 adev->gfx.config.max_cu_per_sh = 2;
1838 adev->gfx.config.max_sh_per_se = 1;
1839 adev->gfx.config.max_backends_per_se = 2;
1840 adev->gfx.config.max_texture_channel_caches = 4;
1841 adev->gfx.config.max_gprs = 256;
1842 adev->gfx.config.max_gs_threads = 32;
1843 adev->gfx.config.max_hw_contexts = 8;
1845 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1846 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1847 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1848 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1849 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1853 mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
1854 adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1855 mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1857 adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1858 adev->gfx.config.mem_max_burst_length_bytes = 256;
1859 if (adev->flags & AMD_IS_APU) {
1860 /* Get memory bank mapping mode. */
1861 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
1862 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1863 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1865 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
1866 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1867 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1869 /* Validate settings in case only one DIMM installed. */
1870 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
1871 dimm00_addr_map = 0;
1872 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
1873 dimm01_addr_map = 0;
1874 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
1875 dimm10_addr_map = 0;
1876 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
1877 dimm11_addr_map = 0;
1879 /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1880 /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1881 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
1882 adev->gfx.config.mem_row_size_in_kb = 2;
1884 adev->gfx.config.mem_row_size_in_kb = 1;
1886 tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
1887 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1888 if (adev->gfx.config.mem_row_size_in_kb > 4)
1889 adev->gfx.config.mem_row_size_in_kb = 4;
1892 adev->gfx.config.shader_engine_tile_size = 32;
1893 adev->gfx.config.num_gpus = 1;
1894 adev->gfx.config.multi_gpu_tile_size = 64;
1896 /* fix up row size */
1897 switch (adev->gfx.config.mem_row_size_in_kb) {
1900 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
1903 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
1906 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
1909 adev->gfx.config.gb_addr_config = gb_addr_config;
1914 static int gfx_v8_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
1915 int mec, int pipe, int queue)
1919 struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
1921 ring = &adev->gfx.compute_ring[ring_id];
1926 ring->queue = queue;
1928 ring->ring_obj = NULL;
1929 ring->use_doorbell = true;
1930 ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + ring_id;
1931 ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
1932 + (ring_id * GFX8_MEC_HPD_SIZE);
1933 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
1935 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
1936 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
1939 /* type-2 packets are deprecated on MEC, use type-3 instead */
1940 r = amdgpu_ring_init(adev, ring, 1024,
1941 &adev->gfx.eop_irq, irq_type);
1949 static int gfx_v8_0_sw_init(void *handle)
1951 int i, j, k, r, ring_id;
1952 struct amdgpu_ring *ring;
1953 struct amdgpu_kiq *kiq;
1954 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1956 switch (adev->asic_type) {
1959 case CHIP_POLARIS11:
1960 case CHIP_POLARIS12:
1961 case CHIP_POLARIS10:
1963 adev->gfx.mec.num_mec = 2;
1968 adev->gfx.mec.num_mec = 1;
1972 adev->gfx.mec.num_pipe_per_mec = 4;
1973 adev->gfx.mec.num_queue_per_pipe = 8;
1976 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 178, &adev->gfx.kiq.irq);
1981 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 181, &adev->gfx.eop_irq);
1985 /* Privileged reg */
1986 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 184,
1987 &adev->gfx.priv_reg_irq);
1991 /* Privileged inst */
1992 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 185,
1993 &adev->gfx.priv_inst_irq);
1997 adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
1999 gfx_v8_0_scratch_init(adev);
2001 r = gfx_v8_0_init_microcode(adev);
2003 DRM_ERROR("Failed to load gfx firmware!\n");
2007 r = gfx_v8_0_rlc_init(adev);
2009 DRM_ERROR("Failed to init rlc BOs!\n");
2013 r = gfx_v8_0_mec_init(adev);
2015 DRM_ERROR("Failed to init MEC BOs!\n");
2019 /* set up the gfx ring */
2020 for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2021 ring = &adev->gfx.gfx_ring[i];
2022 ring->ring_obj = NULL;
2023 sprintf(ring->name, "gfx");
2024 /* no gfx doorbells on iceland */
2025 if (adev->asic_type != CHIP_TOPAZ) {
2026 ring->use_doorbell = true;
2027 ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0;
2030 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2031 AMDGPU_CP_IRQ_GFX_EOP);
2037 /* set up the compute queues - allocate horizontally across pipes */
2039 for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
2040 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
2041 for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
2042 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
2045 r = gfx_v8_0_compute_ring_init(adev,
2056 r = amdgpu_gfx_kiq_init(adev, GFX8_MEC_HPD_SIZE);
2058 DRM_ERROR("Failed to init KIQ BOs!\n");
2062 kiq = &adev->gfx.kiq;
2063 r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
2067 /* create MQD for all compute queues as well as KIQ for SRIOV case */
2068 r = amdgpu_gfx_compute_mqd_sw_init(adev, sizeof(struct vi_mqd_allocation));
2072 /* reserve GDS, GWS and OA resource for gfx */
2073 r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size,
2074 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS,
2075 &adev->gds.gds_gfx_bo, NULL, NULL);
2079 r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size,
2080 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS,
2081 &adev->gds.gws_gfx_bo, NULL, NULL);
2085 r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size,
2086 PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA,
2087 &adev->gds.oa_gfx_bo, NULL, NULL);
2091 adev->gfx.ce_ram_size = 0x8000;
2093 r = gfx_v8_0_gpu_early_init(adev);
2100 static int gfx_v8_0_sw_fini(void *handle)
2103 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2105 amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
2106 amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL);
2107 amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
2109 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2110 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2111 for (i = 0; i < adev->gfx.num_compute_rings; i++)
2112 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2114 amdgpu_gfx_compute_mqd_sw_fini(adev);
2115 amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
2116 amdgpu_gfx_kiq_fini(adev);
2117 amdgpu_bo_free_kernel(&adev->virt.csa_obj, &adev->virt.csa_vmid0_addr, NULL);
2119 gfx_v8_0_mec_fini(adev);
2120 gfx_v8_0_rlc_fini(adev);
2121 gfx_v8_0_free_microcode(adev);
2126 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
2128 uint32_t *modearray, *mod2array;
2129 const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
2130 const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
2133 modearray = adev->gfx.config.tile_mode_array;
2134 mod2array = adev->gfx.config.macrotile_mode_array;
2136 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2137 modearray[reg_offset] = 0;
2139 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2140 mod2array[reg_offset] = 0;
2142 switch (adev->asic_type) {
2144 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2145 PIPE_CONFIG(ADDR_SURF_P2) |
2146 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2147 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2148 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2149 PIPE_CONFIG(ADDR_SURF_P2) |
2150 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2151 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2152 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2153 PIPE_CONFIG(ADDR_SURF_P2) |
2154 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2155 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2156 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2157 PIPE_CONFIG(ADDR_SURF_P2) |
2158 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2159 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2160 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2161 PIPE_CONFIG(ADDR_SURF_P2) |
2162 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2163 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2164 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2165 PIPE_CONFIG(ADDR_SURF_P2) |
2166 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2167 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2168 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2169 PIPE_CONFIG(ADDR_SURF_P2) |
2170 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2171 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2172 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2173 PIPE_CONFIG(ADDR_SURF_P2));
2174 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2175 PIPE_CONFIG(ADDR_SURF_P2) |
2176 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2177 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2178 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2179 PIPE_CONFIG(ADDR_SURF_P2) |
2180 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2181 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2182 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2183 PIPE_CONFIG(ADDR_SURF_P2) |
2184 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2185 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2186 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2187 PIPE_CONFIG(ADDR_SURF_P2) |
2188 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2189 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2190 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2191 PIPE_CONFIG(ADDR_SURF_P2) |
2192 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2193 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2194 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2195 PIPE_CONFIG(ADDR_SURF_P2) |
2196 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2197 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2198 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2199 PIPE_CONFIG(ADDR_SURF_P2) |
2200 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2201 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2202 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2203 PIPE_CONFIG(ADDR_SURF_P2) |
2204 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2205 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2206 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2207 PIPE_CONFIG(ADDR_SURF_P2) |
2208 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2209 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2210 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2211 PIPE_CONFIG(ADDR_SURF_P2) |
2212 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2213 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2214 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2215 PIPE_CONFIG(ADDR_SURF_P2) |
2216 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2217 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2218 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2219 PIPE_CONFIG(ADDR_SURF_P2) |
2220 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2221 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2222 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2223 PIPE_CONFIG(ADDR_SURF_P2) |
2224 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2225 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2226 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2227 PIPE_CONFIG(ADDR_SURF_P2) |
2228 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2229 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2230 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2231 PIPE_CONFIG(ADDR_SURF_P2) |
2232 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2233 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2234 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2235 PIPE_CONFIG(ADDR_SURF_P2) |
2236 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2237 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2238 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2239 PIPE_CONFIG(ADDR_SURF_P2) |
2240 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2241 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2242 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2243 PIPE_CONFIG(ADDR_SURF_P2) |
2244 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2245 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2247 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2248 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2249 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2250 NUM_BANKS(ADDR_SURF_8_BANK));
2251 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2252 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2253 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2254 NUM_BANKS(ADDR_SURF_8_BANK));
2255 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2256 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2257 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2258 NUM_BANKS(ADDR_SURF_8_BANK));
2259 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2260 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2261 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2262 NUM_BANKS(ADDR_SURF_8_BANK));
2263 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2264 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2265 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2266 NUM_BANKS(ADDR_SURF_8_BANK));
2267 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2268 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2269 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2270 NUM_BANKS(ADDR_SURF_8_BANK));
2271 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2272 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2273 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2274 NUM_BANKS(ADDR_SURF_8_BANK));
2275 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2276 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2277 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2278 NUM_BANKS(ADDR_SURF_16_BANK));
2279 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2280 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2281 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2282 NUM_BANKS(ADDR_SURF_16_BANK));
2283 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2284 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2285 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2286 NUM_BANKS(ADDR_SURF_16_BANK));
2287 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2288 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2289 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2290 NUM_BANKS(ADDR_SURF_16_BANK));
2291 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2292 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2293 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2294 NUM_BANKS(ADDR_SURF_16_BANK));
2295 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2296 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2297 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2298 NUM_BANKS(ADDR_SURF_16_BANK));
2299 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2300 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2301 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2302 NUM_BANKS(ADDR_SURF_8_BANK));
2304 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2305 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2307 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2309 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2310 if (reg_offset != 7)
2311 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2315 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2316 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2317 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2318 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2319 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2320 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2321 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2322 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2323 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2324 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2325 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2326 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2327 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2328 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2329 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2330 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2331 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2332 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2333 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2334 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2335 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2336 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2337 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2338 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2339 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2340 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2341 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2342 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2343 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2344 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2345 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2346 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2347 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2348 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2349 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2350 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2351 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2352 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2353 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2354 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2355 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2356 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2357 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2358 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2359 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2360 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2361 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2362 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2363 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2364 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2365 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2366 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2367 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2368 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2369 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2370 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2371 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2372 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2373 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2374 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2375 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2376 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2377 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2378 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2379 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2380 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2381 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2382 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2383 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2384 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2385 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2386 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2387 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2388 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2389 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2390 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2391 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2392 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2393 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2394 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2395 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2396 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2397 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2398 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2399 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2400 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2401 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2402 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2403 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2404 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2405 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2406 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2407 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2408 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2409 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2410 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2411 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2412 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2413 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2414 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2415 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2416 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2417 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2418 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2419 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2420 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2421 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2422 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2423 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2424 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2425 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2426 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2427 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2428 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2429 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2430 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2431 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2432 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2433 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2434 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2435 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2436 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2438 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2439 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2440 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2441 NUM_BANKS(ADDR_SURF_8_BANK));
2442 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2443 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2444 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2445 NUM_BANKS(ADDR_SURF_8_BANK));
2446 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2447 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2448 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2449 NUM_BANKS(ADDR_SURF_8_BANK));
2450 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2451 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2452 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2453 NUM_BANKS(ADDR_SURF_8_BANK));
2454 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2455 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2456 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2457 NUM_BANKS(ADDR_SURF_8_BANK));
2458 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2459 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2460 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2461 NUM_BANKS(ADDR_SURF_8_BANK));
2462 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2463 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2464 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2465 NUM_BANKS(ADDR_SURF_8_BANK));
2466 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2467 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2468 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2469 NUM_BANKS(ADDR_SURF_8_BANK));
2470 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2471 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2472 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2473 NUM_BANKS(ADDR_SURF_8_BANK));
2474 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2475 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2476 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2477 NUM_BANKS(ADDR_SURF_8_BANK));
2478 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2479 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2480 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2481 NUM_BANKS(ADDR_SURF_8_BANK));
2482 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2483 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2484 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2485 NUM_BANKS(ADDR_SURF_8_BANK));
2486 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2487 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2488 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2489 NUM_BANKS(ADDR_SURF_8_BANK));
2490 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2491 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2492 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2493 NUM_BANKS(ADDR_SURF_4_BANK));
2495 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2496 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2498 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2499 if (reg_offset != 7)
2500 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2504 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2505 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2506 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2507 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2508 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2509 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2510 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2511 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2512 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2513 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2514 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2515 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2516 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2517 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2518 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2519 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2520 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2521 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2522 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2523 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2524 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2525 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2526 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2527 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2528 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2529 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2530 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2531 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2532 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2533 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2534 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2535 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2536 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2537 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2538 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2539 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2540 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2541 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2542 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2543 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2544 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2545 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2546 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2547 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2548 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2549 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2550 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2551 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2552 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2553 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2554 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2555 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2556 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2557 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2558 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2559 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2560 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2561 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2562 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2563 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2564 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2565 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2566 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2567 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2568 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2569 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2570 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2571 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2572 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2573 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2574 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2575 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2576 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2577 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2578 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2579 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2580 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2581 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2582 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2583 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2584 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2585 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2586 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2587 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2588 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2589 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2590 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2591 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2592 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2593 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2594 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2595 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2596 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2597 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2598 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2599 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2600 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2601 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2602 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2603 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2604 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2605 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2606 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2607 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2608 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2609 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2610 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2611 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2612 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2613 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2614 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2615 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2616 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2617 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2618 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2619 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2620 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2621 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2622 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2623 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2624 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2625 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2627 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2628 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2629 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2630 NUM_BANKS(ADDR_SURF_16_BANK));
2631 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2632 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2633 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2634 NUM_BANKS(ADDR_SURF_16_BANK));
2635 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2636 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2637 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2638 NUM_BANKS(ADDR_SURF_16_BANK));
2639 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2640 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2641 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2642 NUM_BANKS(ADDR_SURF_16_BANK));
2643 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2644 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2645 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2646 NUM_BANKS(ADDR_SURF_16_BANK));
2647 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2648 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2649 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2650 NUM_BANKS(ADDR_SURF_16_BANK));
2651 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2652 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2653 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2654 NUM_BANKS(ADDR_SURF_16_BANK));
2655 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2656 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2657 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2658 NUM_BANKS(ADDR_SURF_16_BANK));
2659 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2660 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2661 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2662 NUM_BANKS(ADDR_SURF_16_BANK));
2663 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2664 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2665 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2666 NUM_BANKS(ADDR_SURF_16_BANK));
2667 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2668 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2669 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2670 NUM_BANKS(ADDR_SURF_16_BANK));
2671 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2672 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2673 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2674 NUM_BANKS(ADDR_SURF_8_BANK));
2675 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2676 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2677 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2678 NUM_BANKS(ADDR_SURF_4_BANK));
2679 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2680 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2681 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2682 NUM_BANKS(ADDR_SURF_4_BANK));
2684 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2685 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2687 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2688 if (reg_offset != 7)
2689 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2692 case CHIP_POLARIS11:
2693 case CHIP_POLARIS12:
2694 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2695 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2696 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2697 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2698 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2699 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2700 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2701 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2702 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2703 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2704 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2705 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2706 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2707 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2708 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2709 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2710 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2711 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2712 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2713 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2714 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2715 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2716 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2717 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2718 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2719 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2720 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2721 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2722 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2723 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2724 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2725 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2726 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2727 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2728 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2729 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2730 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2731 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2732 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2733 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2734 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2735 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2736 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2737 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2738 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2739 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2740 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2741 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2742 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2743 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2744 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2745 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2746 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2747 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2748 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2749 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2750 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2751 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2752 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2753 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2754 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2755 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2756 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2757 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2758 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2759 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2760 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2761 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2762 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2763 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2764 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2765 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2766 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2767 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2768 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2769 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2770 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2771 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2772 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2773 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2774 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2775 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2776 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2777 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2778 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2779 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2780 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2781 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2782 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2783 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2784 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2785 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2786 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2787 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2788 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2789 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2790 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2791 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2792 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2793 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2794 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2795 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2796 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2797 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2798 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2799 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2800 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2801 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2802 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2803 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2804 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2805 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2806 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2807 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2808 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2809 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2810 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2811 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2812 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2813 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2814 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2815 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2817 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2818 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2819 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2820 NUM_BANKS(ADDR_SURF_16_BANK));
2822 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2823 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2824 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2825 NUM_BANKS(ADDR_SURF_16_BANK));
2827 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2828 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2829 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2830 NUM_BANKS(ADDR_SURF_16_BANK));
2832 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2833 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2834 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2835 NUM_BANKS(ADDR_SURF_16_BANK));
2837 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2838 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2839 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2840 NUM_BANKS(ADDR_SURF_16_BANK));
2842 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2843 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2844 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2845 NUM_BANKS(ADDR_SURF_16_BANK));
2847 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2848 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2849 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2850 NUM_BANKS(ADDR_SURF_16_BANK));
2852 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2853 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2854 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2855 NUM_BANKS(ADDR_SURF_16_BANK));
2857 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2858 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2859 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2860 NUM_BANKS(ADDR_SURF_16_BANK));
2862 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2863 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2864 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2865 NUM_BANKS(ADDR_SURF_16_BANK));
2867 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2868 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2869 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2870 NUM_BANKS(ADDR_SURF_16_BANK));
2872 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2873 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2874 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2875 NUM_BANKS(ADDR_SURF_16_BANK));
2877 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2878 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2879 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2880 NUM_BANKS(ADDR_SURF_8_BANK));
2882 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2883 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2884 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2885 NUM_BANKS(ADDR_SURF_4_BANK));
2887 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2888 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2890 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2891 if (reg_offset != 7)
2892 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2895 case CHIP_POLARIS10:
2896 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2897 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2898 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2899 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2900 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2901 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2902 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2903 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2904 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2905 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2906 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2907 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2908 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2909 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2910 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2911 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2912 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2913 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2914 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2915 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2916 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2917 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2918 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2919 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2920 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2921 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2922 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2923 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2924 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2925 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2926 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2927 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2928 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2929 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2930 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2931 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2932 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2933 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2934 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2935 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2936 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2937 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2938 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2939 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2940 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2941 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2942 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2943 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2944 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2945 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2946 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2947 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2948 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2949 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2950 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2951 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2952 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2953 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2954 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2955 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2956 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2957 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2958 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2959 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2960 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2961 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2962 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2963 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2964 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2965 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2966 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2967 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2968 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2969 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2970 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2971 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2972 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2973 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2974 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2975 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2976 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2977 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2978 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2979 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2980 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2981 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2982 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2983 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2984 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2985 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2986 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2987 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2988 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2989 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2990 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2991 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2992 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2993 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2994 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2995 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2996 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2997 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2998 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2999 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3000 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3001 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3002 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3003 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3004 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3005 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3006 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3007 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3008 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3009 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3010 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3011 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3012 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3013 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3014 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3015 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3016 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3017 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3019 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3020 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3021 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3022 NUM_BANKS(ADDR_SURF_16_BANK));
3024 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3025 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3026 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3027 NUM_BANKS(ADDR_SURF_16_BANK));
3029 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3030 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3031 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3032 NUM_BANKS(ADDR_SURF_16_BANK));
3034 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3035 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3036 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3037 NUM_BANKS(ADDR_SURF_16_BANK));
3039 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3040 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3041 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3042 NUM_BANKS(ADDR_SURF_16_BANK));
3044 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3045 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3046 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3047 NUM_BANKS(ADDR_SURF_16_BANK));
3049 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3050 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3051 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3052 NUM_BANKS(ADDR_SURF_16_BANK));
3054 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3055 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3056 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3057 NUM_BANKS(ADDR_SURF_16_BANK));
3059 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3060 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3061 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3062 NUM_BANKS(ADDR_SURF_16_BANK));
3064 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3065 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3066 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3067 NUM_BANKS(ADDR_SURF_16_BANK));
3069 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3070 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3071 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3072 NUM_BANKS(ADDR_SURF_16_BANK));
3074 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3075 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3076 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3077 NUM_BANKS(ADDR_SURF_8_BANK));
3079 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3080 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3081 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3082 NUM_BANKS(ADDR_SURF_4_BANK));
3084 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3085 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3086 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3087 NUM_BANKS(ADDR_SURF_4_BANK));
3089 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3090 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3092 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3093 if (reg_offset != 7)
3094 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3098 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3099 PIPE_CONFIG(ADDR_SURF_P2) |
3100 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3101 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3102 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3103 PIPE_CONFIG(ADDR_SURF_P2) |
3104 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3105 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3106 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3107 PIPE_CONFIG(ADDR_SURF_P2) |
3108 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3109 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3110 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3111 PIPE_CONFIG(ADDR_SURF_P2) |
3112 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3113 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3114 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3115 PIPE_CONFIG(ADDR_SURF_P2) |
3116 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3117 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3118 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3119 PIPE_CONFIG(ADDR_SURF_P2) |
3120 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3121 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3122 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3123 PIPE_CONFIG(ADDR_SURF_P2) |
3124 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3125 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3126 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3127 PIPE_CONFIG(ADDR_SURF_P2));
3128 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3129 PIPE_CONFIG(ADDR_SURF_P2) |
3130 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3131 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3132 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3133 PIPE_CONFIG(ADDR_SURF_P2) |
3134 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3135 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3136 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3137 PIPE_CONFIG(ADDR_SURF_P2) |
3138 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3139 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3140 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3141 PIPE_CONFIG(ADDR_SURF_P2) |
3142 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3143 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3144 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3145 PIPE_CONFIG(ADDR_SURF_P2) |
3146 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3147 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3148 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3149 PIPE_CONFIG(ADDR_SURF_P2) |
3150 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3151 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3152 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3153 PIPE_CONFIG(ADDR_SURF_P2) |
3154 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3155 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3156 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3157 PIPE_CONFIG(ADDR_SURF_P2) |
3158 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3159 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3160 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3161 PIPE_CONFIG(ADDR_SURF_P2) |
3162 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3163 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3164 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3165 PIPE_CONFIG(ADDR_SURF_P2) |
3166 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3167 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3168 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3169 PIPE_CONFIG(ADDR_SURF_P2) |
3170 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3171 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3172 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3173 PIPE_CONFIG(ADDR_SURF_P2) |
3174 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3175 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3176 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3177 PIPE_CONFIG(ADDR_SURF_P2) |
3178 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3179 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3180 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3181 PIPE_CONFIG(ADDR_SURF_P2) |
3182 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3183 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3184 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3185 PIPE_CONFIG(ADDR_SURF_P2) |
3186 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3187 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3188 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3189 PIPE_CONFIG(ADDR_SURF_P2) |
3190 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3191 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3192 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3193 PIPE_CONFIG(ADDR_SURF_P2) |
3194 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3195 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3196 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3197 PIPE_CONFIG(ADDR_SURF_P2) |
3198 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3199 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3201 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3202 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3203 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3204 NUM_BANKS(ADDR_SURF_8_BANK));
3205 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3206 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3207 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3208 NUM_BANKS(ADDR_SURF_8_BANK));
3209 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3210 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3211 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3212 NUM_BANKS(ADDR_SURF_8_BANK));
3213 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3214 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3215 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3216 NUM_BANKS(ADDR_SURF_8_BANK));
3217 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3218 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3219 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3220 NUM_BANKS(ADDR_SURF_8_BANK));
3221 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3222 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3223 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3224 NUM_BANKS(ADDR_SURF_8_BANK));
3225 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3226 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3227 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3228 NUM_BANKS(ADDR_SURF_8_BANK));
3229 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3230 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3231 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3232 NUM_BANKS(ADDR_SURF_16_BANK));
3233 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3234 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3235 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3236 NUM_BANKS(ADDR_SURF_16_BANK));
3237 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3238 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3239 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3240 NUM_BANKS(ADDR_SURF_16_BANK));
3241 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3242 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3243 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3244 NUM_BANKS(ADDR_SURF_16_BANK));
3245 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3246 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3247 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3248 NUM_BANKS(ADDR_SURF_16_BANK));
3249 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3250 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3251 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3252 NUM_BANKS(ADDR_SURF_16_BANK));
3253 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3254 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3255 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3256 NUM_BANKS(ADDR_SURF_8_BANK));
3258 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3259 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3261 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3263 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3264 if (reg_offset != 7)
3265 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3270 "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
3274 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3275 PIPE_CONFIG(ADDR_SURF_P2) |
3276 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3277 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3278 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3279 PIPE_CONFIG(ADDR_SURF_P2) |
3280 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3281 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3282 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3283 PIPE_CONFIG(ADDR_SURF_P2) |
3284 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3285 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3286 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3287 PIPE_CONFIG(ADDR_SURF_P2) |
3288 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3289 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3290 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3291 PIPE_CONFIG(ADDR_SURF_P2) |
3292 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3293 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3294 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3295 PIPE_CONFIG(ADDR_SURF_P2) |
3296 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3297 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3298 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3299 PIPE_CONFIG(ADDR_SURF_P2) |
3300 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3301 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3302 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3303 PIPE_CONFIG(ADDR_SURF_P2));
3304 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3305 PIPE_CONFIG(ADDR_SURF_P2) |
3306 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3307 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3308 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3309 PIPE_CONFIG(ADDR_SURF_P2) |
3310 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3311 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3312 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3313 PIPE_CONFIG(ADDR_SURF_P2) |
3314 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3315 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3316 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3317 PIPE_CONFIG(ADDR_SURF_P2) |
3318 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3319 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3320 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3321 PIPE_CONFIG(ADDR_SURF_P2) |
3322 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3323 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3324 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3325 PIPE_CONFIG(ADDR_SURF_P2) |
3326 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3327 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3328 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3329 PIPE_CONFIG(ADDR_SURF_P2) |
3330 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3331 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3332 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3333 PIPE_CONFIG(ADDR_SURF_P2) |
3334 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3335 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3336 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3337 PIPE_CONFIG(ADDR_SURF_P2) |
3338 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3339 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3340 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3341 PIPE_CONFIG(ADDR_SURF_P2) |
3342 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3343 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3344 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3345 PIPE_CONFIG(ADDR_SURF_P2) |
3346 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3347 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3348 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3349 PIPE_CONFIG(ADDR_SURF_P2) |
3350 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3351 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3352 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3353 PIPE_CONFIG(ADDR_SURF_P2) |
3354 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3355 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3356 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3357 PIPE_CONFIG(ADDR_SURF_P2) |
3358 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3359 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3360 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3361 PIPE_CONFIG(ADDR_SURF_P2) |
3362 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3363 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3364 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3365 PIPE_CONFIG(ADDR_SURF_P2) |
3366 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3367 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3368 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3369 PIPE_CONFIG(ADDR_SURF_P2) |
3370 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3371 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3372 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3373 PIPE_CONFIG(ADDR_SURF_P2) |
3374 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3375 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3377 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3378 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3379 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3380 NUM_BANKS(ADDR_SURF_8_BANK));
3381 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3382 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3383 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3384 NUM_BANKS(ADDR_SURF_8_BANK));
3385 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3386 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3387 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3388 NUM_BANKS(ADDR_SURF_8_BANK));
3389 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3390 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3391 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3392 NUM_BANKS(ADDR_SURF_8_BANK));
3393 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3394 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3395 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3396 NUM_BANKS(ADDR_SURF_8_BANK));
3397 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3398 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3399 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3400 NUM_BANKS(ADDR_SURF_8_BANK));
3401 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3402 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3403 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3404 NUM_BANKS(ADDR_SURF_8_BANK));
3405 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3406 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3407 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3408 NUM_BANKS(ADDR_SURF_16_BANK));
3409 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3410 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3411 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3412 NUM_BANKS(ADDR_SURF_16_BANK));
3413 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3414 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3415 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3416 NUM_BANKS(ADDR_SURF_16_BANK));
3417 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3418 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3419 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3420 NUM_BANKS(ADDR_SURF_16_BANK));
3421 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3422 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3423 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3424 NUM_BANKS(ADDR_SURF_16_BANK));
3425 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3426 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3427 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3428 NUM_BANKS(ADDR_SURF_16_BANK));
3429 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3430 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3431 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3432 NUM_BANKS(ADDR_SURF_8_BANK));
3434 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3435 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3437 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3439 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3440 if (reg_offset != 7)
3441 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3447 static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
3448 u32 se_num, u32 sh_num, u32 instance)
3452 if (instance == 0xffffffff)
3453 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
3455 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
3457 if (se_num == 0xffffffff)
3458 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3460 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3462 if (sh_num == 0xffffffff)
3463 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3465 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3467 WREG32(mmGRBM_GFX_INDEX, data);
3470 static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
3474 data = RREG32(mmCC_RB_BACKEND_DISABLE) |
3475 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3477 data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE);
3479 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
3480 adev->gfx.config.max_sh_per_se);
3482 return (~data) & mask;
3486 gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1)
3488 switch (adev->asic_type) {
3490 *rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) |
3491 RB_XSEL2(1) | PKR_MAP(2) |
3492 PKR_XSEL(1) | PKR_YSEL(1) |
3493 SE_MAP(2) | SE_XSEL(2) | SE_YSEL(3);
3494 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) |
3498 case CHIP_POLARIS10:
3499 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3500 SE_XSEL(1) | SE_YSEL(1);
3501 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(2) |
3506 *rconf |= RB_MAP_PKR0(2);
3509 case CHIP_POLARIS11:
3510 case CHIP_POLARIS12:
3511 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3512 SE_XSEL(1) | SE_YSEL(1);
3520 DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type);
3526 gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev,
3527 u32 raster_config, u32 raster_config_1,
3528 unsigned rb_mask, unsigned num_rb)
3530 unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1);
3531 unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1);
3532 unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2);
3533 unsigned rb_per_se = num_rb / num_se;
3534 unsigned se_mask[4];
3537 se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask;
3538 se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask;
3539 se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask;
3540 se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask;
3542 WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4));
3543 WARN_ON(!(sh_per_se == 1 || sh_per_se == 2));
3544 WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2));
3546 if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
3547 (!se_mask[2] && !se_mask[3]))) {
3548 raster_config_1 &= ~SE_PAIR_MAP_MASK;
3550 if (!se_mask[0] && !se_mask[1]) {
3552 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3);
3555 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0);
3559 for (se = 0; se < num_se; se++) {
3560 unsigned raster_config_se = raster_config;
3561 unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
3562 unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
3563 int idx = (se / 2) * 2;
3565 if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
3566 raster_config_se &= ~SE_MAP_MASK;
3568 if (!se_mask[idx]) {
3569 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3);
3571 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0);
3575 pkr0_mask &= rb_mask;
3576 pkr1_mask &= rb_mask;
3577 if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
3578 raster_config_se &= ~PKR_MAP_MASK;
3581 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3);
3583 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0);
3587 if (rb_per_se >= 2) {
3588 unsigned rb0_mask = 1 << (se * rb_per_se);
3589 unsigned rb1_mask = rb0_mask << 1;
3591 rb0_mask &= rb_mask;
3592 rb1_mask &= rb_mask;
3593 if (!rb0_mask || !rb1_mask) {
3594 raster_config_se &= ~RB_MAP_PKR0_MASK;
3598 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3);
3601 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0);
3605 if (rb_per_se > 2) {
3606 rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
3607 rb1_mask = rb0_mask << 1;
3608 rb0_mask &= rb_mask;
3609 rb1_mask &= rb_mask;
3610 if (!rb0_mask || !rb1_mask) {
3611 raster_config_se &= ~RB_MAP_PKR1_MASK;
3615 RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3);
3618 RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0);
3624 /* GRBM_GFX_INDEX has a different offset on VI */
3625 gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff);
3626 WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se);
3627 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3630 /* GRBM_GFX_INDEX has a different offset on VI */
3631 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3634 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
3638 u32 raster_config = 0, raster_config_1 = 0;
3640 u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
3641 adev->gfx.config.max_sh_per_se;
3642 unsigned num_rb_pipes;
3644 mutex_lock(&adev->grbm_idx_mutex);
3645 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3646 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3647 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3648 data = gfx_v8_0_get_rb_active_bitmap(adev);
3649 active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
3650 rb_bitmap_width_per_sh);
3653 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3655 adev->gfx.config.backend_enable_mask = active_rbs;
3656 adev->gfx.config.num_rbs = hweight32(active_rbs);
3658 num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se *
3659 adev->gfx.config.max_shader_engines, 16);
3661 gfx_v8_0_raster_config(adev, &raster_config, &raster_config_1);
3663 if (!adev->gfx.config.backend_enable_mask ||
3664 adev->gfx.config.num_rbs >= num_rb_pipes) {
3665 WREG32(mmPA_SC_RASTER_CONFIG, raster_config);
3666 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3668 gfx_v8_0_write_harvested_raster_configs(adev, raster_config, raster_config_1,
3669 adev->gfx.config.backend_enable_mask,
3673 /* cache the values for userspace */
3674 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3675 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3676 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3677 adev->gfx.config.rb_config[i][j].rb_backend_disable =
3678 RREG32(mmCC_RB_BACKEND_DISABLE);
3679 adev->gfx.config.rb_config[i][j].user_rb_backend_disable =
3680 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3681 adev->gfx.config.rb_config[i][j].raster_config =
3682 RREG32(mmPA_SC_RASTER_CONFIG);
3683 adev->gfx.config.rb_config[i][j].raster_config_1 =
3684 RREG32(mmPA_SC_RASTER_CONFIG_1);
3687 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3688 mutex_unlock(&adev->grbm_idx_mutex);
3692 * gfx_v8_0_init_compute_vmid - gart enable
3694 * @adev: amdgpu_device pointer
3696 * Initialize compute vmid sh_mem registers
3699 #define DEFAULT_SH_MEM_BASES (0x6000)
3700 #define FIRST_COMPUTE_VMID (8)
3701 #define LAST_COMPUTE_VMID (16)
3702 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
3705 uint32_t sh_mem_config;
3706 uint32_t sh_mem_bases;
3709 * Configure apertures:
3710 * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB)
3711 * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB)
3712 * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB)
3714 sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
3716 sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
3717 SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
3718 SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
3719 SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
3720 MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
3721 SH_MEM_CONFIG__PRIVATE_ATC_MASK;
3723 mutex_lock(&adev->srbm_mutex);
3724 for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
3725 vi_srbm_select(adev, 0, 0, 0, i);
3726 /* CP and shaders */
3727 WREG32(mmSH_MEM_CONFIG, sh_mem_config);
3728 WREG32(mmSH_MEM_APE1_BASE, 1);
3729 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3730 WREG32(mmSH_MEM_BASES, sh_mem_bases);
3732 vi_srbm_select(adev, 0, 0, 0, 0);
3733 mutex_unlock(&adev->srbm_mutex);
3736 static void gfx_v8_0_config_init(struct amdgpu_device *adev)
3738 switch (adev->asic_type) {
3740 adev->gfx.config.double_offchip_lds_buf = 1;
3744 adev->gfx.config.double_offchip_lds_buf = 0;
3749 static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
3751 u32 tmp, sh_static_mem_cfg;
3754 WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF);
3755 WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3756 WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3757 WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
3759 gfx_v8_0_tiling_mode_table_init(adev);
3760 gfx_v8_0_setup_rb(adev);
3761 gfx_v8_0_get_cu_info(adev);
3762 gfx_v8_0_config_init(adev);
3764 /* XXX SH_MEM regs */
3765 /* where to put LDS, scratch, GPUVM in FSA64 space */
3766 sh_static_mem_cfg = REG_SET_FIELD(0, SH_STATIC_MEM_CONFIG,
3768 sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3770 sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3772 WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg);
3774 mutex_lock(&adev->srbm_mutex);
3775 for (i = 0; i < adev->vm_manager.id_mgr[0].num_ids; i++) {
3776 vi_srbm_select(adev, 0, 0, 0, i);
3777 /* CP and shaders */
3779 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
3780 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3781 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3782 SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3783 WREG32(mmSH_MEM_CONFIG, tmp);
3784 WREG32(mmSH_MEM_BASES, 0);
3786 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
3787 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3788 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3789 SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3790 WREG32(mmSH_MEM_CONFIG, tmp);
3791 tmp = adev->mc.shared_aperture_start >> 48;
3792 WREG32(mmSH_MEM_BASES, tmp);
3795 WREG32(mmSH_MEM_APE1_BASE, 1);
3796 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3798 vi_srbm_select(adev, 0, 0, 0, 0);
3799 mutex_unlock(&adev->srbm_mutex);
3801 gfx_v8_0_init_compute_vmid(adev);
3803 mutex_lock(&adev->grbm_idx_mutex);
3805 * making sure that the following register writes will be broadcasted
3806 * to all the shaders
3808 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3810 WREG32(mmPA_SC_FIFO_SIZE,
3811 (adev->gfx.config.sc_prim_fifo_size_frontend <<
3812 PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
3813 (adev->gfx.config.sc_prim_fifo_size_backend <<
3814 PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
3815 (adev->gfx.config.sc_hiz_tile_fifo_size <<
3816 PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
3817 (adev->gfx.config.sc_earlyz_tile_fifo_size <<
3818 PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
3820 tmp = RREG32(mmSPI_ARB_PRIORITY);
3821 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS0, 2);
3822 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS1, 2);
3823 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS2, 2);
3824 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS3, 2);
3825 WREG32(mmSPI_ARB_PRIORITY, tmp);
3827 mutex_unlock(&adev->grbm_idx_mutex);
3831 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3836 mutex_lock(&adev->grbm_idx_mutex);
3837 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3838 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3839 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3840 for (k = 0; k < adev->usec_timeout; k++) {
3841 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3847 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3848 mutex_unlock(&adev->grbm_idx_mutex);
3850 mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3851 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3852 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3853 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3854 for (k = 0; k < adev->usec_timeout; k++) {
3855 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3861 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3864 u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3866 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
3867 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
3868 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
3869 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
3871 WREG32(mmCP_INT_CNTL_RING0, tmp);
3874 static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
3877 WREG32(mmRLC_CSIB_ADDR_HI,
3878 adev->gfx.rlc.clear_state_gpu_addr >> 32);
3879 WREG32(mmRLC_CSIB_ADDR_LO,
3880 adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
3881 WREG32(mmRLC_CSIB_LENGTH,
3882 adev->gfx.rlc.clear_state_size);
3885 static void gfx_v8_0_parse_ind_reg_list(int *register_list_format,
3888 int *unique_indices,
3891 int *ind_start_offsets,
3896 bool new_entry = true;
3898 for (; ind_offset < list_size; ind_offset++) {
3902 ind_start_offsets[*offset_count] = ind_offset;
3903 *offset_count = *offset_count + 1;
3904 BUG_ON(*offset_count >= max_offset);
3907 if (register_list_format[ind_offset] == 0xFFFFFFFF) {
3914 /* look for the matching indice */
3916 indices < *indices_count;
3918 if (unique_indices[indices] ==
3919 register_list_format[ind_offset])
3923 if (indices >= *indices_count) {
3924 unique_indices[*indices_count] =
3925 register_list_format[ind_offset];
3926 indices = *indices_count;
3927 *indices_count = *indices_count + 1;
3928 BUG_ON(*indices_count >= max_indices);
3931 register_list_format[ind_offset] = indices;
3935 static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
3938 int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0};
3939 int indices_count = 0;
3940 int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3941 int offset_count = 0;
3944 unsigned int *register_list_format =
3945 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
3946 if (!register_list_format)
3948 memcpy(register_list_format, adev->gfx.rlc.register_list_format,
3949 adev->gfx.rlc.reg_list_format_size_bytes);
3951 gfx_v8_0_parse_ind_reg_list(register_list_format,
3952 RLC_FormatDirectRegListLength,
3953 adev->gfx.rlc.reg_list_format_size_bytes >> 2,
3956 ARRAY_SIZE(unique_indices),
3957 indirect_start_offsets,
3959 ARRAY_SIZE(indirect_start_offsets));
3961 /* save and restore list */
3962 WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1);
3964 WREG32(mmRLC_SRM_ARAM_ADDR, 0);
3965 for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
3966 WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]);
3969 WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start);
3970 for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
3971 WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]);
3973 list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
3974 list_size = list_size >> 1;
3975 WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size);
3976 WREG32(mmRLC_GPM_SCRATCH_DATA, list_size);
3978 /* starting offsets starts */
3979 WREG32(mmRLC_GPM_SCRATCH_ADDR,
3980 adev->gfx.rlc.starting_offsets_start);
3981 for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
3982 WREG32(mmRLC_GPM_SCRATCH_DATA,
3983 indirect_start_offsets[i]);
3985 /* unique indices */
3986 temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
3987 data = mmRLC_SRM_INDEX_CNTL_DATA_0;
3988 for (i = 0; i < ARRAY_SIZE(unique_indices); i++) {
3989 if (unique_indices[i] != 0) {
3990 WREG32(temp + i, unique_indices[i] & 0x3FFFF);
3991 WREG32(data + i, unique_indices[i] >> 20);
3994 kfree(register_list_format);
3999 static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
4001 WREG32_FIELD(RLC_SRM_CNTL, SRM_ENABLE, 1);
4004 static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev)
4008 WREG32_FIELD(CP_RB_WPTR_POLL_CNTL, IDLE_POLL_COUNT, 0x60);
4010 data = REG_SET_FIELD(0, RLC_PG_DELAY, POWER_UP_DELAY, 0x10);
4011 data = REG_SET_FIELD(data, RLC_PG_DELAY, POWER_DOWN_DELAY, 0x10);
4012 data = REG_SET_FIELD(data, RLC_PG_DELAY, CMD_PROPAGATE_DELAY, 0x10);
4013 data = REG_SET_FIELD(data, RLC_PG_DELAY, MEM_SLEEP_DELAY, 0x10);
4014 WREG32(mmRLC_PG_DELAY, data);
4016 WREG32_FIELD(RLC_PG_DELAY_2, SERDES_CMD_DELAY, 0x3);
4017 WREG32_FIELD(RLC_AUTO_PG_CTRL, GRBM_REG_SAVE_GFX_IDLE_THRESHOLD, 0x55f0);
4021 static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
4024 WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PU_ENABLE, enable ? 1 : 0);
4027 static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
4030 WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PD_ENABLE, enable ? 1 : 0);
4033 static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable)
4035 WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 0 : 1);
4038 static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
4040 if ((adev->asic_type == CHIP_CARRIZO) ||
4041 (adev->asic_type == CHIP_STONEY)) {
4042 gfx_v8_0_init_csb(adev);
4043 gfx_v8_0_init_save_restore_list(adev);
4044 gfx_v8_0_enable_save_restore_machine(adev);
4045 WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
4046 gfx_v8_0_init_power_gating(adev);
4047 WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
4048 } else if ((adev->asic_type == CHIP_POLARIS11) ||
4049 (adev->asic_type == CHIP_POLARIS12)) {
4050 gfx_v8_0_init_csb(adev);
4051 gfx_v8_0_init_save_restore_list(adev);
4052 gfx_v8_0_enable_save_restore_machine(adev);
4053 gfx_v8_0_init_power_gating(adev);
4058 static void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
4060 WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 0);
4062 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4063 gfx_v8_0_wait_for_rlc_serdes(adev);
4066 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
4068 WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4071 WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
4075 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
4077 WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 1);
4079 /* carrizo do enable cp interrupt after cp inited */
4080 if (!(adev->flags & AMD_IS_APU))
4081 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4086 static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev)
4088 const struct rlc_firmware_header_v2_0 *hdr;
4089 const __le32 *fw_data;
4090 unsigned i, fw_size;
4092 if (!adev->gfx.rlc_fw)
4095 hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
4096 amdgpu_ucode_print_rlc_hdr(&hdr->header);
4098 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
4099 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
4100 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
4102 WREG32(mmRLC_GPM_UCODE_ADDR, 0);
4103 for (i = 0; i < fw_size; i++)
4104 WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
4105 WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
4110 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
4115 gfx_v8_0_rlc_stop(adev);
4118 tmp = RREG32(mmRLC_CGCG_CGLS_CTRL);
4119 tmp &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
4120 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4121 WREG32(mmRLC_CGCG_CGLS_CTRL, tmp);
4122 if (adev->asic_type == CHIP_POLARIS11 ||
4123 adev->asic_type == CHIP_POLARIS10 ||
4124 adev->asic_type == CHIP_POLARIS12) {
4125 tmp = RREG32(mmRLC_CGCG_CGLS_CTRL_3D);
4127 WREG32(mmRLC_CGCG_CGLS_CTRL_3D, tmp);
4131 WREG32(mmRLC_PG_CNTL, 0);
4133 gfx_v8_0_rlc_reset(adev);
4134 gfx_v8_0_init_pg(adev);
4137 if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
4138 /* legacy rlc firmware loading */
4139 r = gfx_v8_0_rlc_load_microcode(adev);
4144 gfx_v8_0_rlc_start(adev);
4149 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
4152 u32 tmp = RREG32(mmCP_ME_CNTL);
4155 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
4156 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
4157 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
4159 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
4160 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
4161 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
4162 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
4163 adev->gfx.gfx_ring[i].ready = false;
4165 WREG32(mmCP_ME_CNTL, tmp);
4169 static int gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
4171 const struct gfx_firmware_header_v1_0 *pfp_hdr;
4172 const struct gfx_firmware_header_v1_0 *ce_hdr;
4173 const struct gfx_firmware_header_v1_0 *me_hdr;
4174 const __le32 *fw_data;
4175 unsigned i, fw_size;
4177 if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
4180 pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
4181 adev->gfx.pfp_fw->data;
4182 ce_hdr = (const struct gfx_firmware_header_v1_0 *)
4183 adev->gfx.ce_fw->data;
4184 me_hdr = (const struct gfx_firmware_header_v1_0 *)
4185 adev->gfx.me_fw->data;
4187 amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
4188 amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
4189 amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
4191 gfx_v8_0_cp_gfx_enable(adev, false);
4194 fw_data = (const __le32 *)
4195 (adev->gfx.pfp_fw->data +
4196 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
4197 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
4198 WREG32(mmCP_PFP_UCODE_ADDR, 0);
4199 for (i = 0; i < fw_size; i++)
4200 WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
4201 WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
4204 fw_data = (const __le32 *)
4205 (adev->gfx.ce_fw->data +
4206 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
4207 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
4208 WREG32(mmCP_CE_UCODE_ADDR, 0);
4209 for (i = 0; i < fw_size; i++)
4210 WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
4211 WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
4214 fw_data = (const __le32 *)
4215 (adev->gfx.me_fw->data +
4216 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
4217 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
4218 WREG32(mmCP_ME_RAM_WADDR, 0);
4219 for (i = 0; i < fw_size; i++)
4220 WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
4221 WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
4226 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
4229 const struct cs_section_def *sect = NULL;
4230 const struct cs_extent_def *ext = NULL;
4232 /* begin clear state */
4234 /* context control state */
4237 for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4238 for (ext = sect->section; ext->extent != NULL; ++ext) {
4239 if (sect->id == SECT_CONTEXT)
4240 count += 2 + ext->reg_count;
4245 /* pa_sc_raster_config/pa_sc_raster_config1 */
4247 /* end clear state */
4255 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
4257 struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
4258 const struct cs_section_def *sect = NULL;
4259 const struct cs_extent_def *ext = NULL;
4263 WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
4264 WREG32(mmCP_ENDIAN_SWAP, 0);
4265 WREG32(mmCP_DEVICE_ID, 1);
4267 gfx_v8_0_cp_gfx_enable(adev, true);
4269 r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
4271 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
4275 /* clear state buffer */
4276 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4277 amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4279 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4280 amdgpu_ring_write(ring, 0x80000000);
4281 amdgpu_ring_write(ring, 0x80000000);
4283 for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4284 for (ext = sect->section; ext->extent != NULL; ++ext) {
4285 if (sect->id == SECT_CONTEXT) {
4286 amdgpu_ring_write(ring,
4287 PACKET3(PACKET3_SET_CONTEXT_REG,
4289 amdgpu_ring_write(ring,
4290 ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
4291 for (i = 0; i < ext->reg_count; i++)
4292 amdgpu_ring_write(ring, ext->extent[i]);
4297 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4298 amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
4299 switch (adev->asic_type) {
4301 case CHIP_POLARIS10:
4302 amdgpu_ring_write(ring, 0x16000012);
4303 amdgpu_ring_write(ring, 0x0000002A);
4305 case CHIP_POLARIS11:
4306 case CHIP_POLARIS12:
4307 amdgpu_ring_write(ring, 0x16000012);
4308 amdgpu_ring_write(ring, 0x00000000);
4311 amdgpu_ring_write(ring, 0x3a00161a);
4312 amdgpu_ring_write(ring, 0x0000002e);
4315 amdgpu_ring_write(ring, 0x00000002);
4316 amdgpu_ring_write(ring, 0x00000000);
4319 amdgpu_ring_write(ring, adev->gfx.config.num_rbs == 1 ?
4320 0x00000000 : 0x00000002);
4321 amdgpu_ring_write(ring, 0x00000000);
4324 amdgpu_ring_write(ring, 0x00000000);
4325 amdgpu_ring_write(ring, 0x00000000);
4331 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4332 amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4334 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4335 amdgpu_ring_write(ring, 0);
4337 /* init the CE partitions */
4338 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4339 amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4340 amdgpu_ring_write(ring, 0x8000);
4341 amdgpu_ring_write(ring, 0x8000);
4343 amdgpu_ring_commit(ring);
4347 static void gfx_v8_0_set_cpg_door_bell(struct amdgpu_device *adev, struct amdgpu_ring *ring)
4350 /* no gfx doorbells on iceland */
4351 if (adev->asic_type == CHIP_TOPAZ)
4354 tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
4356 if (ring->use_doorbell) {
4357 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4358 DOORBELL_OFFSET, ring->doorbell_index);
4359 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4361 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4364 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
4367 WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
4369 if (adev->flags & AMD_IS_APU)
4372 tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
4373 DOORBELL_RANGE_LOWER,
4374 AMDGPU_DOORBELL_GFX_RING0);
4375 WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
4377 WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
4378 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
4381 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
4383 struct amdgpu_ring *ring;
4386 u64 rb_addr, rptr_addr, wptr_gpu_addr;
4389 /* Set the write pointer delay */
4390 WREG32(mmCP_RB_WPTR_DELAY, 0);
4392 /* set the RB to use vmid 0 */
4393 WREG32(mmCP_RB_VMID, 0);
4395 /* Set ring buffer size */
4396 ring = &adev->gfx.gfx_ring[0];
4397 rb_bufsz = order_base_2(ring->ring_size / 8);
4398 tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
4399 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
4400 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
4401 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
4403 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
4405 WREG32(mmCP_RB0_CNTL, tmp);
4407 /* Initialize the ring buffer's read and write pointers */
4408 WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
4410 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
4412 /* set the wb address wether it's enabled or not */
4413 rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4414 WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
4415 WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
4417 wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4418 WREG32(mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
4419 WREG32(mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
4421 WREG32(mmCP_RB0_CNTL, tmp);
4423 rb_addr = ring->gpu_addr >> 8;
4424 WREG32(mmCP_RB0_BASE, rb_addr);
4425 WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
4427 gfx_v8_0_set_cpg_door_bell(adev, ring);
4428 /* start the ring */
4429 amdgpu_ring_clear_ring(ring);
4430 gfx_v8_0_cp_gfx_start(adev);
4432 r = amdgpu_ring_test_ring(ring);
4434 ring->ready = false;
4439 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
4444 WREG32(mmCP_MEC_CNTL, 0);
4446 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
4447 for (i = 0; i < adev->gfx.num_compute_rings; i++)
4448 adev->gfx.compute_ring[i].ready = false;
4449 adev->gfx.kiq.ring.ready = false;
4454 static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev)
4456 const struct gfx_firmware_header_v1_0 *mec_hdr;
4457 const __le32 *fw_data;
4458 unsigned i, fw_size;
4460 if (!adev->gfx.mec_fw)
4463 gfx_v8_0_cp_compute_enable(adev, false);
4465 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
4466 amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
4468 fw_data = (const __le32 *)
4469 (adev->gfx.mec_fw->data +
4470 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4471 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4474 WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0);
4475 for (i = 0; i < fw_size; i++)
4476 WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data+i));
4477 WREG32(mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version);
4479 /* Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
4480 if (adev->gfx.mec2_fw) {
4481 const struct gfx_firmware_header_v1_0 *mec2_hdr;
4483 mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
4484 amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header);
4486 fw_data = (const __le32 *)
4487 (adev->gfx.mec2_fw->data +
4488 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4489 fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4491 WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0);
4492 for (i = 0; i < fw_size; i++)
4493 WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data+i));
4494 WREG32(mmCP_MEC_ME2_UCODE_ADDR, adev->gfx.mec2_fw_version);
4501 static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring)
4504 struct amdgpu_device *adev = ring->adev;
4506 /* tell RLC which is KIQ queue */
4507 tmp = RREG32(mmRLC_CP_SCHEDULERS);
4509 tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
4510 WREG32(mmRLC_CP_SCHEDULERS, tmp);
4512 WREG32(mmRLC_CP_SCHEDULERS, tmp);
4515 static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev)
4517 struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
4518 uint32_t scratch, tmp = 0;
4519 uint64_t queue_mask = 0;
4522 for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
4523 if (!test_bit(i, adev->gfx.mec.queue_bitmap))
4526 /* This situation may be hit in the future if a new HW
4527 * generation exposes more than 64 queues. If so, the
4528 * definition of queue_mask needs updating */
4529 if (WARN_ON(i >= (sizeof(queue_mask)*8))) {
4530 DRM_ERROR("Invalid KCQ enabled: %d\n", i);
4534 queue_mask |= (1ull << i);
4537 r = amdgpu_gfx_scratch_get(adev, &scratch);
4539 DRM_ERROR("Failed to get scratch reg (%d).\n", r);
4542 WREG32(scratch, 0xCAFEDEAD);
4544 r = amdgpu_ring_alloc(kiq_ring, (8 * adev->gfx.num_compute_rings) + 11);
4546 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
4547 amdgpu_gfx_scratch_free(adev, scratch);
4551 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
4552 amdgpu_ring_write(kiq_ring, 0); /* vmid_mask:0 queue_type:0 (KIQ) */
4553 amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */
4554 amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */
4555 amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
4556 amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
4557 amdgpu_ring_write(kiq_ring, 0); /* oac mask */
4558 amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
4559 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4560 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4561 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
4562 uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4565 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
4566 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
4567 amdgpu_ring_write(kiq_ring,
4568 PACKET3_MAP_QUEUES_NUM_QUEUES(1));
4569 amdgpu_ring_write(kiq_ring,
4570 PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index) |
4571 PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
4572 PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
4573 PACKET3_MAP_QUEUES_ME(ring->me == 1 ? 0 : 1)); /* doorbell */
4574 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
4575 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
4576 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
4577 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
4579 /* write to scratch for completion */
4580 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
4581 amdgpu_ring_write(kiq_ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
4582 amdgpu_ring_write(kiq_ring, 0xDEADBEEF);
4583 amdgpu_ring_commit(kiq_ring);
4585 for (i = 0; i < adev->usec_timeout; i++) {
4586 tmp = RREG32(scratch);
4587 if (tmp == 0xDEADBEEF)
4591 if (i >= adev->usec_timeout) {
4592 DRM_ERROR("KCQ enable failed (scratch(0x%04X)=0x%08X)\n",
4596 amdgpu_gfx_scratch_free(adev, scratch);
4601 static int gfx_v8_0_deactivate_hqd(struct amdgpu_device *adev, u32 req)
4605 if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) {
4606 WREG32_FIELD(CP_HQD_DEQUEUE_REQUEST, DEQUEUE_REQ, req);
4607 for (i = 0; i < adev->usec_timeout; i++) {
4608 if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK))
4612 if (i == adev->usec_timeout)
4615 WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
4616 WREG32(mmCP_HQD_PQ_RPTR, 0);
4617 WREG32(mmCP_HQD_PQ_WPTR, 0);
4622 static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
4624 struct amdgpu_device *adev = ring->adev;
4625 struct vi_mqd *mqd = ring->mqd_ptr;
4626 uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
4629 mqd->header = 0xC0310800;
4630 mqd->compute_pipelinestat_enable = 0x00000001;
4631 mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4632 mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4633 mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4634 mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4635 mqd->compute_misc_reserved = 0x00000003;
4636 mqd->dynamic_cu_mask_addr_lo = lower_32_bits(ring->mqd_gpu_addr
4637 + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4638 mqd->dynamic_cu_mask_addr_hi = upper_32_bits(ring->mqd_gpu_addr
4639 + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4640 eop_base_addr = ring->eop_gpu_addr >> 8;
4641 mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
4642 mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
4644 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4645 tmp = RREG32(mmCP_HQD_EOP_CONTROL);
4646 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
4647 (order_base_2(GFX8_MEC_HPD_SIZE / 4) - 1));
4649 mqd->cp_hqd_eop_control = tmp;
4651 /* enable doorbell? */
4652 tmp = REG_SET_FIELD(RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL),
4653 CP_HQD_PQ_DOORBELL_CONTROL,
4655 ring->use_doorbell ? 1 : 0);
4657 mqd->cp_hqd_pq_doorbell_control = tmp;
4659 /* set the pointer to the MQD */
4660 mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
4661 mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
4663 /* set MQD vmid to 0 */
4664 tmp = RREG32(mmCP_MQD_CONTROL);
4665 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4666 mqd->cp_mqd_control = tmp;
4668 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4669 hqd_gpu_addr = ring->gpu_addr >> 8;
4670 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4671 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4673 /* set up the HQD, this is similar to CP_RB0_CNTL */
4674 tmp = RREG32(mmCP_HQD_PQ_CONTROL);
4675 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4676 (order_base_2(ring->ring_size / 4) - 1));
4677 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4678 ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
4680 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
4682 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
4683 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
4684 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4685 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4686 mqd->cp_hqd_pq_control = tmp;
4688 /* set the wb address whether it's enabled or not */
4689 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4690 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4691 mqd->cp_hqd_pq_rptr_report_addr_hi =
4692 upper_32_bits(wb_gpu_addr) & 0xffff;
4694 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4695 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4696 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
4697 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4700 /* enable the doorbell if requested */
4701 if (ring->use_doorbell) {
4702 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4703 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4704 DOORBELL_OFFSET, ring->doorbell_index);
4706 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4708 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4709 DOORBELL_SOURCE, 0);
4710 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4714 mqd->cp_hqd_pq_doorbell_control = tmp;
4716 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4718 mqd->cp_hqd_pq_wptr = ring->wptr;
4719 mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
4721 /* set the vmid for the queue */
4722 mqd->cp_hqd_vmid = 0;
4724 tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
4725 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
4726 mqd->cp_hqd_persistent_state = tmp;
4729 tmp = RREG32(mmCP_HQD_IB_CONTROL);
4730 tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
4731 tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MTYPE, 3);
4732 mqd->cp_hqd_ib_control = tmp;
4734 tmp = RREG32(mmCP_HQD_IQ_TIMER);
4735 tmp = REG_SET_FIELD(tmp, CP_HQD_IQ_TIMER, MTYPE, 3);
4736 mqd->cp_hqd_iq_timer = tmp;
4738 tmp = RREG32(mmCP_HQD_CTX_SAVE_CONTROL);
4739 tmp = REG_SET_FIELD(tmp, CP_HQD_CTX_SAVE_CONTROL, MTYPE, 3);
4740 mqd->cp_hqd_ctx_save_control = tmp;
4743 mqd->cp_hqd_eop_rptr = RREG32(mmCP_HQD_EOP_RPTR);
4744 mqd->cp_hqd_eop_wptr = RREG32(mmCP_HQD_EOP_WPTR);
4745 mqd->cp_hqd_pipe_priority = RREG32(mmCP_HQD_PIPE_PRIORITY);
4746 mqd->cp_hqd_queue_priority = RREG32(mmCP_HQD_QUEUE_PRIORITY);
4747 mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM);
4748 mqd->cp_hqd_ctx_save_base_addr_lo = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_LO);
4749 mqd->cp_hqd_ctx_save_base_addr_hi = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_HI);
4750 mqd->cp_hqd_cntl_stack_offset = RREG32(mmCP_HQD_CNTL_STACK_OFFSET);
4751 mqd->cp_hqd_cntl_stack_size = RREG32(mmCP_HQD_CNTL_STACK_SIZE);
4752 mqd->cp_hqd_wg_state_offset = RREG32(mmCP_HQD_WG_STATE_OFFSET);
4753 mqd->cp_hqd_ctx_save_size = RREG32(mmCP_HQD_CTX_SAVE_SIZE);
4754 mqd->cp_hqd_eop_done_events = RREG32(mmCP_HQD_EOP_EVENTS);
4755 mqd->cp_hqd_error = RREG32(mmCP_HQD_ERROR);
4756 mqd->cp_hqd_eop_wptr_mem = RREG32(mmCP_HQD_EOP_WPTR_MEM);
4757 mqd->cp_hqd_eop_dones = RREG32(mmCP_HQD_EOP_DONES);
4759 /* activate the queue */
4760 mqd->cp_hqd_active = 1;
4765 int gfx_v8_0_mqd_commit(struct amdgpu_device *adev,
4771 /* HQD registers extend from mmCP_MQD_BASE_ADDR to mmCP_HQD_ERROR */
4772 mqd_data = &mqd->cp_mqd_base_addr_lo;
4774 /* disable wptr polling */
4775 WREG32_FIELD(CP_PQ_WPTR_POLL_CNTL, EN, 0);
4777 /* program all HQD registers */
4778 for (mqd_reg = mmCP_HQD_VMID; mqd_reg <= mmCP_HQD_EOP_CONTROL; mqd_reg++)
4779 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4781 /* Tonga errata: EOP RPTR/WPTR should be left unmodified.
4782 * This is safe since EOP RPTR==WPTR for any inactive HQD
4783 * on ASICs that do not support context-save.
4784 * EOP writes/reads can start anywhere in the ring.
4786 if (adev->asic_type != CHIP_TONGA) {
4787 WREG32(mmCP_HQD_EOP_RPTR, mqd->cp_hqd_eop_rptr);
4788 WREG32(mmCP_HQD_EOP_WPTR, mqd->cp_hqd_eop_wptr);
4789 WREG32(mmCP_HQD_EOP_WPTR_MEM, mqd->cp_hqd_eop_wptr_mem);
4792 for (mqd_reg = mmCP_HQD_EOP_EVENTS; mqd_reg <= mmCP_HQD_ERROR; mqd_reg++)
4793 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4795 /* activate the HQD */
4796 for (mqd_reg = mmCP_MQD_BASE_ADDR; mqd_reg <= mmCP_HQD_ACTIVE; mqd_reg++)
4797 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4802 static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring)
4804 struct amdgpu_device *adev = ring->adev;
4805 struct vi_mqd *mqd = ring->mqd_ptr;
4806 int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
4808 gfx_v8_0_kiq_setting(ring);
4810 if (adev->in_sriov_reset) { /* for GPU_RESET case */
4811 /* reset MQD to a clean status */
4812 if (adev->gfx.mec.mqd_backup[mqd_idx])
4813 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4815 /* reset ring buffer */
4817 amdgpu_ring_clear_ring(ring);
4818 mutex_lock(&adev->srbm_mutex);
4819 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4820 gfx_v8_0_mqd_commit(adev, mqd);
4821 vi_srbm_select(adev, 0, 0, 0, 0);
4822 mutex_unlock(&adev->srbm_mutex);
4824 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4825 ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4826 ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
4827 mutex_lock(&adev->srbm_mutex);
4828 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4829 gfx_v8_0_mqd_init(ring);
4830 gfx_v8_0_mqd_commit(adev, mqd);
4831 vi_srbm_select(adev, 0, 0, 0, 0);
4832 mutex_unlock(&adev->srbm_mutex);
4834 if (adev->gfx.mec.mqd_backup[mqd_idx])
4835 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4841 static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring)
4843 struct amdgpu_device *adev = ring->adev;
4844 struct vi_mqd *mqd = ring->mqd_ptr;
4845 int mqd_idx = ring - &adev->gfx.compute_ring[0];
4847 if (!adev->in_sriov_reset && !adev->gfx.in_suspend) {
4848 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4849 ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4850 ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
4851 mutex_lock(&adev->srbm_mutex);
4852 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4853 gfx_v8_0_mqd_init(ring);
4854 vi_srbm_select(adev, 0, 0, 0, 0);
4855 mutex_unlock(&adev->srbm_mutex);
4857 if (adev->gfx.mec.mqd_backup[mqd_idx])
4858 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4859 } else if (adev->in_sriov_reset) { /* for GPU_RESET case */
4860 /* reset MQD to a clean status */
4861 if (adev->gfx.mec.mqd_backup[mqd_idx])
4862 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4863 /* reset ring buffer */
4865 amdgpu_ring_clear_ring(ring);
4867 amdgpu_ring_clear_ring(ring);
4872 static void gfx_v8_0_set_mec_doorbell_range(struct amdgpu_device *adev)
4874 if (adev->asic_type > CHIP_TONGA) {
4875 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, AMDGPU_DOORBELL_KIQ << 2);
4876 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, AMDGPU_DOORBELL_MEC_RING7 << 2);
4878 /* enable doorbells */
4879 WREG32_FIELD(CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4882 static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev)
4884 struct amdgpu_ring *ring = NULL;
4887 gfx_v8_0_cp_compute_enable(adev, true);
4889 ring = &adev->gfx.kiq.ring;
4891 r = amdgpu_bo_reserve(ring->mqd_obj, false);
4892 if (unlikely(r != 0))
4895 r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4897 r = gfx_v8_0_kiq_init_queue(ring);
4898 amdgpu_bo_kunmap(ring->mqd_obj);
4899 ring->mqd_ptr = NULL;
4901 amdgpu_bo_unreserve(ring->mqd_obj);
4905 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4906 ring = &adev->gfx.compute_ring[i];
4908 r = amdgpu_bo_reserve(ring->mqd_obj, false);
4909 if (unlikely(r != 0))
4911 r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4913 r = gfx_v8_0_kcq_init_queue(ring);
4914 amdgpu_bo_kunmap(ring->mqd_obj);
4915 ring->mqd_ptr = NULL;
4917 amdgpu_bo_unreserve(ring->mqd_obj);
4922 gfx_v8_0_set_mec_doorbell_range(adev);
4924 r = gfx_v8_0_kiq_kcq_enable(adev);
4929 ring = &adev->gfx.kiq.ring;
4931 r = amdgpu_ring_test_ring(ring);
4933 ring->ready = false;
4938 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4939 ring = &adev->gfx.compute_ring[i];
4941 r = amdgpu_ring_test_ring(ring);
4943 ring->ready = false;
4950 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
4954 if (!(adev->flags & AMD_IS_APU))
4955 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4957 if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
4958 /* legacy firmware loading */
4959 r = gfx_v8_0_cp_gfx_load_microcode(adev);
4963 r = gfx_v8_0_cp_compute_load_microcode(adev);
4968 r = gfx_v8_0_cp_gfx_resume(adev);
4972 r = gfx_v8_0_kiq_resume(adev);
4976 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4981 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
4983 gfx_v8_0_cp_gfx_enable(adev, enable);
4984 gfx_v8_0_cp_compute_enable(adev, enable);
4987 static int gfx_v8_0_hw_init(void *handle)
4990 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4992 gfx_v8_0_init_golden_registers(adev);
4993 gfx_v8_0_gpu_init(adev);
4995 r = gfx_v8_0_rlc_resume(adev);
4999 r = gfx_v8_0_cp_resume(adev);
5004 static int gfx_v8_0_kcq_disable(struct amdgpu_ring *kiq_ring,struct amdgpu_ring *ring)
5006 struct amdgpu_device *adev = kiq_ring->adev;
5007 uint32_t scratch, tmp = 0;
5010 r = amdgpu_gfx_scratch_get(adev, &scratch);
5012 DRM_ERROR("Failed to get scratch reg (%d).\n", r);
5015 WREG32(scratch, 0xCAFEDEAD);
5017 r = amdgpu_ring_alloc(kiq_ring, 10);
5019 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
5020 amdgpu_gfx_scratch_free(adev, scratch);
5025 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
5026 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
5027 PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */
5028 PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
5029 PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) |
5030 PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
5031 amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
5032 amdgpu_ring_write(kiq_ring, 0);
5033 amdgpu_ring_write(kiq_ring, 0);
5034 amdgpu_ring_write(kiq_ring, 0);
5035 /* write to scratch for completion */
5036 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
5037 amdgpu_ring_write(kiq_ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
5038 amdgpu_ring_write(kiq_ring, 0xDEADBEEF);
5039 amdgpu_ring_commit(kiq_ring);
5041 for (i = 0; i < adev->usec_timeout; i++) {
5042 tmp = RREG32(scratch);
5043 if (tmp == 0xDEADBEEF)
5047 if (i >= adev->usec_timeout) {
5048 DRM_ERROR("KCQ disabled failed (scratch(0x%04X)=0x%08X)\n", scratch, tmp);
5051 amdgpu_gfx_scratch_free(adev, scratch);
5055 static int gfx_v8_0_hw_fini(void *handle)
5057 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5060 amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
5061 amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
5063 /* disable KCQ to avoid CPC touch memory not valid anymore */
5064 for (i = 0; i < adev->gfx.num_compute_rings; i++)
5065 gfx_v8_0_kcq_disable(&adev->gfx.kiq.ring, &adev->gfx.compute_ring[i]);
5067 if (amdgpu_sriov_vf(adev)) {
5068 pr_debug("For SRIOV client, shouldn't do anything.\n");
5071 gfx_v8_0_cp_enable(adev, false);
5072 gfx_v8_0_rlc_stop(adev);
5074 amdgpu_set_powergating_state(adev,
5075 AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_UNGATE);
5080 static int gfx_v8_0_suspend(void *handle)
5082 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5083 adev->gfx.in_suspend = true;
5084 return gfx_v8_0_hw_fini(adev);
5087 static int gfx_v8_0_resume(void *handle)
5090 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5092 r = gfx_v8_0_hw_init(adev);
5093 adev->gfx.in_suspend = false;
5097 static bool gfx_v8_0_is_idle(void *handle)
5099 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5101 if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE))
5107 static int gfx_v8_0_wait_for_idle(void *handle)
5110 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5112 for (i = 0; i < adev->usec_timeout; i++) {
5113 if (gfx_v8_0_is_idle(handle))
5121 static bool gfx_v8_0_check_soft_reset(void *handle)
5123 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5124 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5128 tmp = RREG32(mmGRBM_STATUS);
5129 if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
5130 GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
5131 GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
5132 GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
5133 GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
5134 GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK |
5135 GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
5136 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5137 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
5138 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5139 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
5140 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5141 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5145 tmp = RREG32(mmGRBM_STATUS2);
5146 if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
5147 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5148 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
5150 if (REG_GET_FIELD(tmp, GRBM_STATUS2, CPF_BUSY) ||
5151 REG_GET_FIELD(tmp, GRBM_STATUS2, CPC_BUSY) ||
5152 REG_GET_FIELD(tmp, GRBM_STATUS2, CPG_BUSY)) {
5153 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5155 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5157 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5159 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET,
5160 SOFT_RESET_GRBM, 1);
5164 tmp = RREG32(mmSRBM_STATUS);
5165 if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
5166 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5167 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5168 if (REG_GET_FIELD(tmp, SRBM_STATUS, SEM_BUSY))
5169 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5170 SRBM_SOFT_RESET, SOFT_RESET_SEM, 1);
5172 if (grbm_soft_reset || srbm_soft_reset) {
5173 adev->gfx.grbm_soft_reset = grbm_soft_reset;
5174 adev->gfx.srbm_soft_reset = srbm_soft_reset;
5177 adev->gfx.grbm_soft_reset = 0;
5178 adev->gfx.srbm_soft_reset = 0;
5183 static int gfx_v8_0_pre_soft_reset(void *handle)
5185 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5186 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5188 if ((!adev->gfx.grbm_soft_reset) &&
5189 (!adev->gfx.srbm_soft_reset))
5192 grbm_soft_reset = adev->gfx.grbm_soft_reset;
5193 srbm_soft_reset = adev->gfx.srbm_soft_reset;
5196 gfx_v8_0_rlc_stop(adev);
5198 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5199 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5200 /* Disable GFX parsing/prefetching */
5201 gfx_v8_0_cp_gfx_enable(adev, false);
5203 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5204 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5205 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5206 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5209 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5210 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5212 mutex_lock(&adev->srbm_mutex);
5213 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5214 gfx_v8_0_deactivate_hqd(adev, 2);
5215 vi_srbm_select(adev, 0, 0, 0, 0);
5216 mutex_unlock(&adev->srbm_mutex);
5218 /* Disable MEC parsing/prefetching */
5219 gfx_v8_0_cp_compute_enable(adev, false);
5225 static int gfx_v8_0_soft_reset(void *handle)
5227 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5228 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5231 if ((!adev->gfx.grbm_soft_reset) &&
5232 (!adev->gfx.srbm_soft_reset))
5235 grbm_soft_reset = adev->gfx.grbm_soft_reset;
5236 srbm_soft_reset = adev->gfx.srbm_soft_reset;
5238 if (grbm_soft_reset || srbm_soft_reset) {
5239 tmp = RREG32(mmGMCON_DEBUG);
5240 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 1);
5241 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 1);
5242 WREG32(mmGMCON_DEBUG, tmp);
5246 if (grbm_soft_reset) {
5247 tmp = RREG32(mmGRBM_SOFT_RESET);
5248 tmp |= grbm_soft_reset;
5249 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5250 WREG32(mmGRBM_SOFT_RESET, tmp);
5251 tmp = RREG32(mmGRBM_SOFT_RESET);
5255 tmp &= ~grbm_soft_reset;
5256 WREG32(mmGRBM_SOFT_RESET, tmp);
5257 tmp = RREG32(mmGRBM_SOFT_RESET);
5260 if (srbm_soft_reset) {
5261 tmp = RREG32(mmSRBM_SOFT_RESET);
5262 tmp |= srbm_soft_reset;
5263 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5264 WREG32(mmSRBM_SOFT_RESET, tmp);
5265 tmp = RREG32(mmSRBM_SOFT_RESET);
5269 tmp &= ~srbm_soft_reset;
5270 WREG32(mmSRBM_SOFT_RESET, tmp);
5271 tmp = RREG32(mmSRBM_SOFT_RESET);
5274 if (grbm_soft_reset || srbm_soft_reset) {
5275 tmp = RREG32(mmGMCON_DEBUG);
5276 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 0);
5277 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 0);
5278 WREG32(mmGMCON_DEBUG, tmp);
5281 /* Wait a little for things to settle down */
5287 static int gfx_v8_0_post_soft_reset(void *handle)
5289 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5290 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5292 if ((!adev->gfx.grbm_soft_reset) &&
5293 (!adev->gfx.srbm_soft_reset))
5296 grbm_soft_reset = adev->gfx.grbm_soft_reset;
5297 srbm_soft_reset = adev->gfx.srbm_soft_reset;
5299 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5300 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5301 gfx_v8_0_cp_gfx_resume(adev);
5303 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5304 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5305 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5306 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5309 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5310 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5312 mutex_lock(&adev->srbm_mutex);
5313 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5314 gfx_v8_0_deactivate_hqd(adev, 2);
5315 vi_srbm_select(adev, 0, 0, 0, 0);
5316 mutex_unlock(&adev->srbm_mutex);
5318 gfx_v8_0_kiq_resume(adev);
5320 gfx_v8_0_rlc_start(adev);
5326 * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
5328 * @adev: amdgpu_device pointer
5330 * Fetches a GPU clock counter snapshot.
5331 * Returns the 64 bit clock counter snapshot.
5333 static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
5337 mutex_lock(&adev->gfx.gpu_clock_mutex);
5338 WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
5339 clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
5340 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5341 mutex_unlock(&adev->gfx.gpu_clock_mutex);
5345 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
5347 uint32_t gds_base, uint32_t gds_size,
5348 uint32_t gws_base, uint32_t gws_size,
5349 uint32_t oa_base, uint32_t oa_size)
5351 gds_base = gds_base >> AMDGPU_GDS_SHIFT;
5352 gds_size = gds_size >> AMDGPU_GDS_SHIFT;
5354 gws_base = gws_base >> AMDGPU_GWS_SHIFT;
5355 gws_size = gws_size >> AMDGPU_GWS_SHIFT;
5357 oa_base = oa_base >> AMDGPU_OA_SHIFT;
5358 oa_size = oa_size >> AMDGPU_OA_SHIFT;
5361 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5362 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5363 WRITE_DATA_DST_SEL(0)));
5364 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
5365 amdgpu_ring_write(ring, 0);
5366 amdgpu_ring_write(ring, gds_base);
5369 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5370 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5371 WRITE_DATA_DST_SEL(0)));
5372 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
5373 amdgpu_ring_write(ring, 0);
5374 amdgpu_ring_write(ring, gds_size);
5377 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5378 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5379 WRITE_DATA_DST_SEL(0)));
5380 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
5381 amdgpu_ring_write(ring, 0);
5382 amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
5385 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5386 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5387 WRITE_DATA_DST_SEL(0)));
5388 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
5389 amdgpu_ring_write(ring, 0);
5390 amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
5393 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
5395 WREG32(mmSQ_IND_INDEX,
5396 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5397 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5398 (address << SQ_IND_INDEX__INDEX__SHIFT) |
5399 (SQ_IND_INDEX__FORCE_READ_MASK));
5400 return RREG32(mmSQ_IND_DATA);
5403 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
5404 uint32_t wave, uint32_t thread,
5405 uint32_t regno, uint32_t num, uint32_t *out)
5407 WREG32(mmSQ_IND_INDEX,
5408 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5409 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5410 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
5411 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
5412 (SQ_IND_INDEX__FORCE_READ_MASK) |
5413 (SQ_IND_INDEX__AUTO_INCR_MASK));
5415 *(out++) = RREG32(mmSQ_IND_DATA);
5418 static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
5420 /* type 0 wave data */
5421 dst[(*no_fields)++] = 0;
5422 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
5423 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
5424 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
5425 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
5426 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
5427 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
5428 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
5429 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
5430 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
5431 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
5432 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
5433 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
5434 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_LO);
5435 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_HI);
5436 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_LO);
5437 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI);
5438 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
5439 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
5442 static void gfx_v8_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
5443 uint32_t wave, uint32_t start,
5444 uint32_t size, uint32_t *dst)
5447 adev, simd, wave, 0,
5448 start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
5452 static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = {
5453 .get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter,
5454 .select_se_sh = &gfx_v8_0_select_se_sh,
5455 .read_wave_data = &gfx_v8_0_read_wave_data,
5456 .read_wave_sgprs = &gfx_v8_0_read_wave_sgprs,
5459 static int gfx_v8_0_early_init(void *handle)
5461 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5463 adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
5464 adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
5465 adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
5466 gfx_v8_0_set_ring_funcs(adev);
5467 gfx_v8_0_set_irq_funcs(adev);
5468 gfx_v8_0_set_gds_init(adev);
5469 gfx_v8_0_set_rlc_funcs(adev);
5474 static int gfx_v8_0_late_init(void *handle)
5476 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5479 r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
5483 r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
5487 /* requires IBs so do in late init after IB pool is initialized */
5488 r = gfx_v8_0_do_edc_gpr_workarounds(adev);
5492 amdgpu_set_powergating_state(adev,
5493 AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_GATE);
5498 static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
5501 if ((adev->asic_type == CHIP_POLARIS11) ||
5502 (adev->asic_type == CHIP_POLARIS12))
5503 /* Send msg to SMU via Powerplay */
5504 amdgpu_set_powergating_state(adev,
5505 AMD_IP_BLOCK_TYPE_SMC,
5507 AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE);
5509 WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0);
5512 static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
5515 WREG32_FIELD(RLC_PG_CNTL, DYN_PER_CU_PG_ENABLE, enable ? 1 : 0);
5518 static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
5521 WREG32_FIELD(RLC_PG_CNTL, QUICK_PG_ENABLE, enable ? 1 : 0);
5524 static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
5527 WREG32_FIELD(RLC_PG_CNTL, GFX_POWER_GATING_ENABLE, enable ? 1 : 0);
5530 static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev,
5533 WREG32_FIELD(RLC_PG_CNTL, GFX_PIPELINE_PG_ENABLE, enable ? 1 : 0);
5535 /* Read any GFX register to wake up GFX. */
5537 RREG32(mmDB_RENDER_CONTROL);
5540 static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev,
5543 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
5544 cz_enable_gfx_cg_power_gating(adev, true);
5545 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
5546 cz_enable_gfx_pipeline_power_gating(adev, true);
5548 cz_enable_gfx_cg_power_gating(adev, false);
5549 cz_enable_gfx_pipeline_power_gating(adev, false);
5553 static int gfx_v8_0_set_powergating_state(void *handle,
5554 enum amd_powergating_state state)
5556 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5557 bool enable = (state == AMD_PG_STATE_GATE);
5559 if (amdgpu_sriov_vf(adev))
5562 switch (adev->asic_type) {
5566 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
5567 cz_enable_sck_slow_down_on_power_up(adev, true);
5568 cz_enable_sck_slow_down_on_power_down(adev, true);
5570 cz_enable_sck_slow_down_on_power_up(adev, false);
5571 cz_enable_sck_slow_down_on_power_down(adev, false);
5573 if (adev->pg_flags & AMD_PG_SUPPORT_CP)
5574 cz_enable_cp_power_gating(adev, true);
5576 cz_enable_cp_power_gating(adev, false);
5578 cz_update_gfx_cg_power_gating(adev, enable);
5580 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5581 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5583 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5585 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5586 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5588 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5590 case CHIP_POLARIS11:
5591 case CHIP_POLARIS12:
5592 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5593 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5595 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5597 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5598 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5600 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5602 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable)
5603 polaris11_enable_gfx_quick_mg_power_gating(adev, true);
5605 polaris11_enable_gfx_quick_mg_power_gating(adev, false);
5614 static void gfx_v8_0_get_clockgating_state(void *handle, u32 *flags)
5616 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5619 if (amdgpu_sriov_vf(adev))
5622 /* AMD_CG_SUPPORT_GFX_MGCG */
5623 data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5624 if (!(data & RLC_CGTT_MGCG_OVERRIDE__CPF_MASK))
5625 *flags |= AMD_CG_SUPPORT_GFX_MGCG;
5627 /* AMD_CG_SUPPORT_GFX_CGLG */
5628 data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5629 if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5630 *flags |= AMD_CG_SUPPORT_GFX_CGCG;
5632 /* AMD_CG_SUPPORT_GFX_CGLS */
5633 if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5634 *flags |= AMD_CG_SUPPORT_GFX_CGLS;
5636 /* AMD_CG_SUPPORT_GFX_CGTS */
5637 data = RREG32(mmCGTS_SM_CTRL_REG);
5638 if (!(data & CGTS_SM_CTRL_REG__OVERRIDE_MASK))
5639 *flags |= AMD_CG_SUPPORT_GFX_CGTS;
5641 /* AMD_CG_SUPPORT_GFX_CGTS_LS */
5642 if (!(data & CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK))
5643 *flags |= AMD_CG_SUPPORT_GFX_CGTS_LS;
5645 /* AMD_CG_SUPPORT_GFX_RLC_LS */
5646 data = RREG32(mmRLC_MEM_SLP_CNTL);
5647 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
5648 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
5650 /* AMD_CG_SUPPORT_GFX_CP_LS */
5651 data = RREG32(mmCP_MEM_SLP_CNTL);
5652 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
5653 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
5656 static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
5657 uint32_t reg_addr, uint32_t cmd)
5661 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
5663 WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5664 WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5666 data = RREG32(mmRLC_SERDES_WR_CTRL);
5667 if (adev->asic_type == CHIP_STONEY)
5668 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5669 RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5670 RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5671 RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5672 RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5673 RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5674 RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5675 RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5676 RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5678 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5679 RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5680 RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5681 RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5682 RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5683 RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5684 RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5685 RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5686 RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
5687 RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
5688 RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5689 data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
5690 (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
5691 (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
5692 (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
5694 WREG32(mmRLC_SERDES_WR_CTRL, data);
5697 #define MSG_ENTER_RLC_SAFE_MODE 1
5698 #define MSG_EXIT_RLC_SAFE_MODE 0
5699 #define RLC_GPR_REG2__REQ_MASK 0x00000001
5700 #define RLC_GPR_REG2__REQ__SHIFT 0
5701 #define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001
5702 #define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e
5704 static void iceland_enter_rlc_safe_mode(struct amdgpu_device *adev)
5709 data = RREG32(mmRLC_CNTL);
5710 if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5713 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5714 data |= RLC_SAFE_MODE__CMD_MASK;
5715 data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5716 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
5717 WREG32(mmRLC_SAFE_MODE, data);
5719 for (i = 0; i < adev->usec_timeout; i++) {
5720 if ((RREG32(mmRLC_GPM_STAT) &
5721 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5722 RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5723 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5724 RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5729 for (i = 0; i < adev->usec_timeout; i++) {
5730 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5734 adev->gfx.rlc.in_safe_mode = true;
5738 static void iceland_exit_rlc_safe_mode(struct amdgpu_device *adev)
5743 data = RREG32(mmRLC_CNTL);
5744 if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5747 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5748 if (adev->gfx.rlc.in_safe_mode) {
5749 data |= RLC_SAFE_MODE__CMD_MASK;
5750 data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5751 WREG32(mmRLC_SAFE_MODE, data);
5752 adev->gfx.rlc.in_safe_mode = false;
5756 for (i = 0; i < adev->usec_timeout; i++) {
5757 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5763 static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
5764 .enter_safe_mode = iceland_enter_rlc_safe_mode,
5765 .exit_safe_mode = iceland_exit_rlc_safe_mode
5768 static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
5771 uint32_t temp, data;
5773 adev->gfx.rlc.funcs->enter_safe_mode(adev);
5775 /* It is disabled by HW by default */
5776 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
5777 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5778 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS)
5779 /* 1 - RLC memory Light sleep */
5780 WREG32_FIELD(RLC_MEM_SLP_CNTL, RLC_MEM_LS_EN, 1);
5782 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS)
5783 WREG32_FIELD(CP_MEM_SLP_CNTL, CP_MEM_LS_EN, 1);
5786 /* 3 - RLC_CGTT_MGCG_OVERRIDE */
5787 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5788 if (adev->flags & AMD_IS_APU)
5789 data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5790 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5791 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK);
5793 data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5794 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5795 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5796 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5799 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5801 /* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5802 gfx_v8_0_wait_for_rlc_serdes(adev);
5804 /* 5 - clear mgcg override */
5805 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5807 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
5808 /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
5809 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5810 data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
5811 data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
5812 data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
5813 data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
5814 if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
5815 (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
5816 data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
5817 data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
5818 data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
5820 WREG32(mmCGTS_SM_CTRL_REG, data);
5824 /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5825 gfx_v8_0_wait_for_rlc_serdes(adev);
5827 /* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
5828 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5829 data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5830 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5831 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5832 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5834 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5836 /* 2 - disable MGLS in RLC */
5837 data = RREG32(mmRLC_MEM_SLP_CNTL);
5838 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
5839 data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5840 WREG32(mmRLC_MEM_SLP_CNTL, data);
5843 /* 3 - disable MGLS in CP */
5844 data = RREG32(mmCP_MEM_SLP_CNTL);
5845 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
5846 data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5847 WREG32(mmCP_MEM_SLP_CNTL, data);
5850 /* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
5851 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5852 data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
5853 CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
5855 WREG32(mmCGTS_SM_CTRL_REG, data);
5857 /* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5858 gfx_v8_0_wait_for_rlc_serdes(adev);
5860 /* 6 - set mgcg override */
5861 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5865 /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5866 gfx_v8_0_wait_for_rlc_serdes(adev);
5869 adev->gfx.rlc.funcs->exit_safe_mode(adev);
5872 static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5875 uint32_t temp, temp1, data, data1;
5877 temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5879 adev->gfx.rlc.funcs->enter_safe_mode(adev);
5881 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
5882 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5883 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
5885 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5887 /* : wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5888 gfx_v8_0_wait_for_rlc_serdes(adev);
5890 /* 2 - clear cgcg override */
5891 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5893 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5894 gfx_v8_0_wait_for_rlc_serdes(adev);
5896 /* 3 - write cmd to set CGLS */
5897 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
5899 /* 4 - enable cgcg */
5900 data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5902 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5904 data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5906 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5907 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
5910 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5912 data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5916 WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5918 /* 5 enable cntx_empty_int_enable/cntx_busy_int_enable/
5919 * Cmp_busy/GFX_Idle interrupts
5921 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5923 /* disable cntx_empty_int_enable & GFX Idle interrupt */
5924 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5927 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5928 data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
5929 RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
5931 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5933 /* read gfx register to wake up cgcg */
5934 RREG32(mmCB_CGTT_SCLK_CTRL);
5935 RREG32(mmCB_CGTT_SCLK_CTRL);
5936 RREG32(mmCB_CGTT_SCLK_CTRL);
5937 RREG32(mmCB_CGTT_SCLK_CTRL);
5939 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5940 gfx_v8_0_wait_for_rlc_serdes(adev);
5942 /* write cmd to Set CGCG Overrride */
5943 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5945 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5946 gfx_v8_0_wait_for_rlc_serdes(adev);
5948 /* write cmd to Clear CGLS */
5949 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
5951 /* disable cgcg, cgls should be disabled too. */
5952 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
5953 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
5955 WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5956 /* enable interrupts again for PG */
5957 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5960 gfx_v8_0_wait_for_rlc_serdes(adev);
5962 adev->gfx.rlc.funcs->exit_safe_mode(adev);
5964 static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
5968 /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
5969 * === MGCG + MGLS + TS(CG/LS) ===
5971 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5972 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5974 /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
5975 * === CGCG + CGLS ===
5977 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5978 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5983 static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev,
5984 enum amd_clockgating_state state)
5986 uint32_t msg_id, pp_state = 0;
5987 uint32_t pp_support_state = 0;
5989 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
5990 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5991 pp_support_state = PP_STATE_SUPPORT_LS;
5992 pp_state = PP_STATE_LS;
5994 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
5995 pp_support_state |= PP_STATE_SUPPORT_CG;
5996 pp_state |= PP_STATE_CG;
5998 if (state == AMD_CG_STATE_UNGATE)
6001 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6005 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6006 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6009 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
6010 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
6011 pp_support_state = PP_STATE_SUPPORT_LS;
6012 pp_state = PP_STATE_LS;
6015 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
6016 pp_support_state |= PP_STATE_SUPPORT_CG;
6017 pp_state |= PP_STATE_CG;
6020 if (state == AMD_CG_STATE_UNGATE)
6023 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6027 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6028 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6034 static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev,
6035 enum amd_clockgating_state state)
6038 uint32_t msg_id, pp_state = 0;
6039 uint32_t pp_support_state = 0;
6041 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
6042 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
6043 pp_support_state = PP_STATE_SUPPORT_LS;
6044 pp_state = PP_STATE_LS;
6046 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
6047 pp_support_state |= PP_STATE_SUPPORT_CG;
6048 pp_state |= PP_STATE_CG;
6050 if (state == AMD_CG_STATE_UNGATE)
6053 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6057 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6058 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6061 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_GFX_3D_CGLS)) {
6062 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) {
6063 pp_support_state = PP_STATE_SUPPORT_LS;
6064 pp_state = PP_STATE_LS;
6066 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) {
6067 pp_support_state |= PP_STATE_SUPPORT_CG;
6068 pp_state |= PP_STATE_CG;
6070 if (state == AMD_CG_STATE_UNGATE)
6073 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6077 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6078 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6081 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
6082 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
6083 pp_support_state = PP_STATE_SUPPORT_LS;
6084 pp_state = PP_STATE_LS;
6087 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
6088 pp_support_state |= PP_STATE_SUPPORT_CG;
6089 pp_state |= PP_STATE_CG;
6092 if (state == AMD_CG_STATE_UNGATE)
6095 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6099 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6100 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6103 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
6104 pp_support_state = PP_STATE_SUPPORT_LS;
6106 if (state == AMD_CG_STATE_UNGATE)
6109 pp_state = PP_STATE_LS;
6111 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6115 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6116 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6119 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
6120 pp_support_state = PP_STATE_SUPPORT_LS;
6122 if (state == AMD_CG_STATE_UNGATE)
6125 pp_state = PP_STATE_LS;
6126 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6130 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6131 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6137 static int gfx_v8_0_set_clockgating_state(void *handle,
6138 enum amd_clockgating_state state)
6140 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
6142 if (amdgpu_sriov_vf(adev))
6145 switch (adev->asic_type) {
6149 gfx_v8_0_update_gfx_clock_gating(adev,
6150 state == AMD_CG_STATE_GATE);
6153 gfx_v8_0_tonga_update_gfx_clock_gating(adev, state);
6155 case CHIP_POLARIS10:
6156 case CHIP_POLARIS11:
6157 case CHIP_POLARIS12:
6158 gfx_v8_0_polaris_update_gfx_clock_gating(adev, state);
6166 static u64 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring)
6168 return ring->adev->wb.wb[ring->rptr_offs];
6171 static u64 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
6173 struct amdgpu_device *adev = ring->adev;
6175 if (ring->use_doorbell)
6176 /* XXX check if swapping is necessary on BE */
6177 return ring->adev->wb.wb[ring->wptr_offs];
6179 return RREG32(mmCP_RB0_WPTR);
6182 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
6184 struct amdgpu_device *adev = ring->adev;
6186 if (ring->use_doorbell) {
6187 /* XXX check if swapping is necessary on BE */
6188 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6189 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6191 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
6192 (void)RREG32(mmCP_RB0_WPTR);
6196 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
6198 u32 ref_and_mask, reg_mem_engine;
6200 if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) ||
6201 (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)) {
6204 ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
6207 ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
6214 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
6215 reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
6218 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6219 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
6220 WAIT_REG_MEM_FUNCTION(3) | /* == */
6222 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
6223 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
6224 amdgpu_ring_write(ring, ref_and_mask);
6225 amdgpu_ring_write(ring, ref_and_mask);
6226 amdgpu_ring_write(ring, 0x20); /* poll interval */
6229 static void gfx_v8_0_ring_emit_vgt_flush(struct amdgpu_ring *ring)
6231 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6232 amdgpu_ring_write(ring, EVENT_TYPE(VS_PARTIAL_FLUSH) |
6235 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6236 amdgpu_ring_write(ring, EVENT_TYPE(VGT_FLUSH) |
6241 static void gfx_v8_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
6243 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6244 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6245 WRITE_DATA_DST_SEL(0) |
6247 amdgpu_ring_write(ring, mmHDP_DEBUG0);
6248 amdgpu_ring_write(ring, 0);
6249 amdgpu_ring_write(ring, 1);
6253 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
6254 struct amdgpu_ib *ib,
6255 unsigned vm_id, bool ctx_switch)
6257 u32 header, control = 0;
6259 if (ib->flags & AMDGPU_IB_FLAG_CE)
6260 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
6262 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
6264 control |= ib->length_dw | (vm_id << 24);
6266 if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
6267 control |= INDIRECT_BUFFER_PRE_ENB(1);
6269 if (!(ib->flags & AMDGPU_IB_FLAG_CE))
6270 gfx_v8_0_ring_emit_de_meta(ring);
6273 amdgpu_ring_write(ring, header);
6274 amdgpu_ring_write(ring,
6278 (ib->gpu_addr & 0xFFFFFFFC));
6279 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6280 amdgpu_ring_write(ring, control);
6283 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
6284 struct amdgpu_ib *ib,
6285 unsigned vm_id, bool ctx_switch)
6287 u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vm_id << 24);
6289 amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
6290 amdgpu_ring_write(ring,
6294 (ib->gpu_addr & 0xFFFFFFFC));
6295 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6296 amdgpu_ring_write(ring, control);
6299 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
6300 u64 seq, unsigned flags)
6302 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6303 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6305 /* EVENT_WRITE_EOP - flush caches, send int */
6306 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6307 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6309 EOP_TC_WB_ACTION_EN |
6310 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6312 amdgpu_ring_write(ring, addr & 0xfffffffc);
6313 amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
6314 DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6315 amdgpu_ring_write(ring, lower_32_bits(seq));
6316 amdgpu_ring_write(ring, upper_32_bits(seq));
6320 static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
6322 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6323 uint32_t seq = ring->fence_drv.sync_seq;
6324 uint64_t addr = ring->fence_drv.gpu_addr;
6326 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6327 amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
6328 WAIT_REG_MEM_FUNCTION(3) | /* equal */
6329 WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
6330 amdgpu_ring_write(ring, addr & 0xfffffffc);
6331 amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
6332 amdgpu_ring_write(ring, seq);
6333 amdgpu_ring_write(ring, 0xffffffff);
6334 amdgpu_ring_write(ring, 4); /* poll interval */
6337 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
6338 unsigned vm_id, uint64_t pd_addr)
6340 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6342 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6343 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6344 WRITE_DATA_DST_SEL(0)) |
6347 amdgpu_ring_write(ring,
6348 (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id));
6350 amdgpu_ring_write(ring,
6351 (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vm_id - 8));
6353 amdgpu_ring_write(ring, 0);
6354 amdgpu_ring_write(ring, pd_addr >> 12);
6356 /* bits 0-15 are the VM contexts0-15 */
6357 /* invalidate the cache */
6358 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6359 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6360 WRITE_DATA_DST_SEL(0)));
6361 amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6362 amdgpu_ring_write(ring, 0);
6363 amdgpu_ring_write(ring, 1 << vm_id);
6365 /* wait for the invalidate to complete */
6366 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6367 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6368 WAIT_REG_MEM_FUNCTION(0) | /* always */
6369 WAIT_REG_MEM_ENGINE(0))); /* me */
6370 amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6371 amdgpu_ring_write(ring, 0);
6372 amdgpu_ring_write(ring, 0); /* ref */
6373 amdgpu_ring_write(ring, 0); /* mask */
6374 amdgpu_ring_write(ring, 0x20); /* poll interval */
6376 /* compute doesn't have PFP */
6378 /* sync PFP to ME, otherwise we might get invalid PFP reads */
6379 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6380 amdgpu_ring_write(ring, 0x0);
6384 static u64 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
6386 return ring->adev->wb.wb[ring->wptr_offs];
6389 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
6391 struct amdgpu_device *adev = ring->adev;
6393 /* XXX check if swapping is necessary on BE */
6394 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6395 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6398 static void gfx_v8_0_ring_set_pipe_percent(struct amdgpu_ring *ring,
6401 struct amdgpu_device *adev = ring->adev;
6402 int pipe_num, tmp, reg;
6403 int pipe_percent = acquire ? SPI_WCL_PIPE_PERCENT_GFX__VALUE_MASK : 0x1;
6405 pipe_num = ring->me * adev->gfx.mec.num_pipe_per_mec + ring->pipe;
6407 /* first me only has 2 entries, GFX and HP3D */
6411 reg = mmSPI_WCL_PIPE_PERCENT_GFX + pipe_num;
6413 tmp = REG_SET_FIELD(tmp, SPI_WCL_PIPE_PERCENT_GFX, VALUE, pipe_percent);
6417 static void gfx_v8_0_pipe_reserve_resources(struct amdgpu_device *adev,
6418 struct amdgpu_ring *ring,
6423 struct amdgpu_ring *iring;
6425 mutex_lock(&adev->gfx.pipe_reserve_mutex);
6426 pipe = amdgpu_gfx_queue_to_bit(adev, ring->me, ring->pipe, 0);
6428 set_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6430 clear_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6432 if (!bitmap_weight(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)) {
6433 /* Clear all reservations - everyone reacquires all resources */
6434 for (i = 0; i < adev->gfx.num_gfx_rings; ++i)
6435 gfx_v8_0_ring_set_pipe_percent(&adev->gfx.gfx_ring[i],
6438 for (i = 0; i < adev->gfx.num_compute_rings; ++i)
6439 gfx_v8_0_ring_set_pipe_percent(&adev->gfx.compute_ring[i],
6442 /* Lower all pipes without a current reservation */
6443 for (i = 0; i < adev->gfx.num_gfx_rings; ++i) {
6444 iring = &adev->gfx.gfx_ring[i];
6445 pipe = amdgpu_gfx_queue_to_bit(adev,
6449 reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6450 gfx_v8_0_ring_set_pipe_percent(iring, reserve);
6453 for (i = 0; i < adev->gfx.num_compute_rings; ++i) {
6454 iring = &adev->gfx.compute_ring[i];
6455 pipe = amdgpu_gfx_queue_to_bit(adev,
6459 reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6460 gfx_v8_0_ring_set_pipe_percent(iring, reserve);
6464 mutex_unlock(&adev->gfx.pipe_reserve_mutex);
6467 static void gfx_v8_0_hqd_set_priority(struct amdgpu_device *adev,
6468 struct amdgpu_ring *ring,
6471 uint32_t pipe_priority = acquire ? 0x2 : 0x0;
6472 uint32_t queue_priority = acquire ? 0xf : 0x0;
6474 mutex_lock(&adev->srbm_mutex);
6475 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
6477 WREG32(mmCP_HQD_PIPE_PRIORITY, pipe_priority);
6478 WREG32(mmCP_HQD_QUEUE_PRIORITY, queue_priority);
6480 vi_srbm_select(adev, 0, 0, 0, 0);
6481 mutex_unlock(&adev->srbm_mutex);
6483 static void gfx_v8_0_ring_set_priority_compute(struct amdgpu_ring *ring,
6484 enum amd_sched_priority priority)
6486 struct amdgpu_device *adev = ring->adev;
6487 bool acquire = priority == AMD_SCHED_PRIORITY_HIGH_HW;
6489 if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE)
6492 gfx_v8_0_hqd_set_priority(adev, ring, acquire);
6493 gfx_v8_0_pipe_reserve_resources(adev, ring, acquire);
6496 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
6500 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6501 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6503 /* RELEASE_MEM - flush caches, send int */
6504 amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
6505 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6507 EOP_TC_WB_ACTION_EN |
6508 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6510 amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6511 amdgpu_ring_write(ring, addr & 0xfffffffc);
6512 amdgpu_ring_write(ring, upper_32_bits(addr));
6513 amdgpu_ring_write(ring, lower_32_bits(seq));
6514 amdgpu_ring_write(ring, upper_32_bits(seq));
6517 static void gfx_v8_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
6518 u64 seq, unsigned int flags)
6520 /* we only allocate 32bit for each seq wb address */
6521 BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
6523 /* write fence seq to the "addr" */
6524 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6525 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6526 WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
6527 amdgpu_ring_write(ring, lower_32_bits(addr));
6528 amdgpu_ring_write(ring, upper_32_bits(addr));
6529 amdgpu_ring_write(ring, lower_32_bits(seq));
6531 if (flags & AMDGPU_FENCE_FLAG_INT) {
6532 /* set register to trigger INT */
6533 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6534 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6535 WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
6536 amdgpu_ring_write(ring, mmCPC_INT_STATUS);
6537 amdgpu_ring_write(ring, 0);
6538 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
6542 static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring)
6544 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6545 amdgpu_ring_write(ring, 0);
6548 static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
6552 if (amdgpu_sriov_vf(ring->adev))
6553 gfx_v8_0_ring_emit_ce_meta(ring);
6555 dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
6556 if (flags & AMDGPU_HAVE_CTX_SWITCH) {
6557 gfx_v8_0_ring_emit_vgt_flush(ring);
6558 /* set load_global_config & load_global_uconfig */
6560 /* set load_cs_sh_regs */
6562 /* set load_per_context_state & load_gfx_sh_regs for GFX */
6565 /* set load_ce_ram if preamble presented */
6566 if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
6569 /* still load_ce_ram if this is the first time preamble presented
6570 * although there is no context switch happens.
6572 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
6576 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6577 amdgpu_ring_write(ring, dw2);
6578 amdgpu_ring_write(ring, 0);
6581 static unsigned gfx_v8_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
6585 amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
6586 amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
6587 amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
6588 amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
6589 ret = ring->wptr & ring->buf_mask;
6590 amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
6594 static void gfx_v8_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
6598 BUG_ON(offset > ring->buf_mask);
6599 BUG_ON(ring->ring[offset] != 0x55aa55aa);
6601 cur = (ring->wptr & ring->buf_mask) - 1;
6602 if (likely(cur > offset))
6603 ring->ring[offset] = cur - offset;
6605 ring->ring[offset] = (ring->ring_size >> 2) - offset + cur;
6608 static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
6610 struct amdgpu_device *adev = ring->adev;
6612 amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
6613 amdgpu_ring_write(ring, 0 | /* src: register*/
6614 (5 << 8) | /* dst: memory */
6615 (1 << 20)); /* write confirm */
6616 amdgpu_ring_write(ring, reg);
6617 amdgpu_ring_write(ring, 0);
6618 amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
6619 adev->virt.reg_val_offs * 4));
6620 amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
6621 adev->virt.reg_val_offs * 4));
6624 static void gfx_v8_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
6627 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6628 amdgpu_ring_write(ring, (1 << 16)); /* no inc addr */
6629 amdgpu_ring_write(ring, reg);
6630 amdgpu_ring_write(ring, 0);
6631 amdgpu_ring_write(ring, val);
6634 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
6635 enum amdgpu_interrupt_state state)
6637 WREG32_FIELD(CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE,
6638 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6641 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
6643 enum amdgpu_interrupt_state state)
6645 u32 mec_int_cntl, mec_int_cntl_reg;
6648 * amdgpu controls only the first MEC. That's why this function only
6649 * handles the setting of interrupts for this specific MEC. All other
6650 * pipes' interrupts are set by amdkfd.
6656 mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
6659 mec_int_cntl_reg = mmCP_ME1_PIPE1_INT_CNTL;
6662 mec_int_cntl_reg = mmCP_ME1_PIPE2_INT_CNTL;
6665 mec_int_cntl_reg = mmCP_ME1_PIPE3_INT_CNTL;
6668 DRM_DEBUG("invalid pipe %d\n", pipe);
6672 DRM_DEBUG("invalid me %d\n", me);
6677 case AMDGPU_IRQ_STATE_DISABLE:
6678 mec_int_cntl = RREG32(mec_int_cntl_reg);
6679 mec_int_cntl &= ~CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6680 WREG32(mec_int_cntl_reg, mec_int_cntl);
6682 case AMDGPU_IRQ_STATE_ENABLE:
6683 mec_int_cntl = RREG32(mec_int_cntl_reg);
6684 mec_int_cntl |= CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6685 WREG32(mec_int_cntl_reg, mec_int_cntl);
6692 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
6693 struct amdgpu_irq_src *source,
6695 enum amdgpu_interrupt_state state)
6697 WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE,
6698 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6703 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
6704 struct amdgpu_irq_src *source,
6706 enum amdgpu_interrupt_state state)
6708 WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE,
6709 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6714 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6715 struct amdgpu_irq_src *src,
6717 enum amdgpu_interrupt_state state)
6720 case AMDGPU_CP_IRQ_GFX_EOP:
6721 gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
6723 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6724 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6726 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6727 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6729 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6730 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6732 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6733 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6735 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6736 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6738 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6739 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6741 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6742 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6744 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6745 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6753 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
6754 struct amdgpu_irq_src *source,
6755 struct amdgpu_iv_entry *entry)
6758 u8 me_id, pipe_id, queue_id;
6759 struct amdgpu_ring *ring;
6761 DRM_DEBUG("IH: CP EOP\n");
6762 me_id = (entry->ring_id & 0x0c) >> 2;
6763 pipe_id = (entry->ring_id & 0x03) >> 0;
6764 queue_id = (entry->ring_id & 0x70) >> 4;
6768 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6772 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6773 ring = &adev->gfx.compute_ring[i];
6774 /* Per-queue interrupt is supported for MEC starting from VI.
6775 * The interrupt can only be enabled/disabled per pipe instead of per queue.
6777 if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6778 amdgpu_fence_process(ring);
6785 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
6786 struct amdgpu_irq_src *source,
6787 struct amdgpu_iv_entry *entry)
6789 DRM_ERROR("Illegal register access in command stream\n");
6790 schedule_work(&adev->reset_work);
6794 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
6795 struct amdgpu_irq_src *source,
6796 struct amdgpu_iv_entry *entry)
6798 DRM_ERROR("Illegal instruction in command stream\n");
6799 schedule_work(&adev->reset_work);
6803 static int gfx_v8_0_kiq_set_interrupt_state(struct amdgpu_device *adev,
6804 struct amdgpu_irq_src *src,
6806 enum amdgpu_interrupt_state state)
6808 struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
6811 case AMDGPU_CP_KIQ_IRQ_DRIVER0:
6812 WREG32_FIELD(CPC_INT_CNTL, GENERIC2_INT_ENABLE,
6813 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6815 WREG32_FIELD_OFFSET(CP_ME1_PIPE0_INT_CNTL,
6817 GENERIC2_INT_ENABLE,
6818 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6820 WREG32_FIELD_OFFSET(CP_ME2_PIPE0_INT_CNTL,
6822 GENERIC2_INT_ENABLE,
6823 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6826 BUG(); /* kiq only support GENERIC2_INT now */
6832 static int gfx_v8_0_kiq_irq(struct amdgpu_device *adev,
6833 struct amdgpu_irq_src *source,
6834 struct amdgpu_iv_entry *entry)
6836 u8 me_id, pipe_id, queue_id;
6837 struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
6839 me_id = (entry->ring_id & 0x0c) >> 2;
6840 pipe_id = (entry->ring_id & 0x03) >> 0;
6841 queue_id = (entry->ring_id & 0x70) >> 4;
6842 DRM_DEBUG("IH: CPC GENERIC2_INT, me:%d, pipe:%d, queue:%d\n",
6843 me_id, pipe_id, queue_id);
6845 amdgpu_fence_process(ring);
6849 static const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
6851 .early_init = gfx_v8_0_early_init,
6852 .late_init = gfx_v8_0_late_init,
6853 .sw_init = gfx_v8_0_sw_init,
6854 .sw_fini = gfx_v8_0_sw_fini,
6855 .hw_init = gfx_v8_0_hw_init,
6856 .hw_fini = gfx_v8_0_hw_fini,
6857 .suspend = gfx_v8_0_suspend,
6858 .resume = gfx_v8_0_resume,
6859 .is_idle = gfx_v8_0_is_idle,
6860 .wait_for_idle = gfx_v8_0_wait_for_idle,
6861 .check_soft_reset = gfx_v8_0_check_soft_reset,
6862 .pre_soft_reset = gfx_v8_0_pre_soft_reset,
6863 .soft_reset = gfx_v8_0_soft_reset,
6864 .post_soft_reset = gfx_v8_0_post_soft_reset,
6865 .set_clockgating_state = gfx_v8_0_set_clockgating_state,
6866 .set_powergating_state = gfx_v8_0_set_powergating_state,
6867 .get_clockgating_state = gfx_v8_0_get_clockgating_state,
6870 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
6871 .type = AMDGPU_RING_TYPE_GFX,
6873 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6874 .support_64bit_ptrs = false,
6875 .get_rptr = gfx_v8_0_ring_get_rptr,
6876 .get_wptr = gfx_v8_0_ring_get_wptr_gfx,
6877 .set_wptr = gfx_v8_0_ring_set_wptr_gfx,
6878 .emit_frame_size = /* maximum 215dw if count 16 IBs in */
6880 7 + /* PIPELINE_SYNC */
6882 8 + /* FENCE for VM_FLUSH */
6883 20 + /* GDS switch */
6884 4 + /* double SWITCH_BUFFER,
6885 the first COND_EXEC jump to the place just
6886 prior to this double SWITCH_BUFFER */
6894 8 + 8 + /* FENCE x2 */
6895 2, /* SWITCH_BUFFER */
6896 .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_gfx */
6897 .emit_ib = gfx_v8_0_ring_emit_ib_gfx,
6898 .emit_fence = gfx_v8_0_ring_emit_fence_gfx,
6899 .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6900 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6901 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6902 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6903 .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
6904 .test_ring = gfx_v8_0_ring_test_ring,
6905 .test_ib = gfx_v8_0_ring_test_ib,
6906 .insert_nop = amdgpu_ring_insert_nop,
6907 .pad_ib = amdgpu_ring_generic_pad_ib,
6908 .emit_switch_buffer = gfx_v8_ring_emit_sb,
6909 .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
6910 .init_cond_exec = gfx_v8_0_ring_emit_init_cond_exec,
6911 .patch_cond_exec = gfx_v8_0_ring_emit_patch_cond_exec,
6914 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
6915 .type = AMDGPU_RING_TYPE_COMPUTE,
6917 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6918 .support_64bit_ptrs = false,
6919 .get_rptr = gfx_v8_0_ring_get_rptr,
6920 .get_wptr = gfx_v8_0_ring_get_wptr_compute,
6921 .set_wptr = gfx_v8_0_ring_set_wptr_compute,
6923 20 + /* gfx_v8_0_ring_emit_gds_switch */
6924 7 + /* gfx_v8_0_ring_emit_hdp_flush */
6925 5 + /* gfx_v8_0_ring_emit_hdp_invalidate */
6926 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6927 17 + /* gfx_v8_0_ring_emit_vm_flush */
6928 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
6929 .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */
6930 .emit_ib = gfx_v8_0_ring_emit_ib_compute,
6931 .emit_fence = gfx_v8_0_ring_emit_fence_compute,
6932 .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6933 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6934 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6935 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6936 .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
6937 .test_ring = gfx_v8_0_ring_test_ring,
6938 .test_ib = gfx_v8_0_ring_test_ib,
6939 .insert_nop = amdgpu_ring_insert_nop,
6940 .pad_ib = amdgpu_ring_generic_pad_ib,
6941 .set_priority = gfx_v8_0_ring_set_priority_compute,
6944 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {
6945 .type = AMDGPU_RING_TYPE_KIQ,
6947 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6948 .support_64bit_ptrs = false,
6949 .get_rptr = gfx_v8_0_ring_get_rptr,
6950 .get_wptr = gfx_v8_0_ring_get_wptr_compute,
6951 .set_wptr = gfx_v8_0_ring_set_wptr_compute,
6953 20 + /* gfx_v8_0_ring_emit_gds_switch */
6954 7 + /* gfx_v8_0_ring_emit_hdp_flush */
6955 5 + /* gfx_v8_0_ring_emit_hdp_invalidate */
6956 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6957 17 + /* gfx_v8_0_ring_emit_vm_flush */
6958 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */
6959 .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */
6960 .emit_ib = gfx_v8_0_ring_emit_ib_compute,
6961 .emit_fence = gfx_v8_0_ring_emit_fence_kiq,
6962 .test_ring = gfx_v8_0_ring_test_ring,
6963 .test_ib = gfx_v8_0_ring_test_ib,
6964 .insert_nop = amdgpu_ring_insert_nop,
6965 .pad_ib = amdgpu_ring_generic_pad_ib,
6966 .emit_rreg = gfx_v8_0_ring_emit_rreg,
6967 .emit_wreg = gfx_v8_0_ring_emit_wreg,
6970 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
6974 adev->gfx.kiq.ring.funcs = &gfx_v8_0_ring_funcs_kiq;
6976 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
6977 adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
6979 for (i = 0; i < adev->gfx.num_compute_rings; i++)
6980 adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
6983 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
6984 .set = gfx_v8_0_set_eop_interrupt_state,
6985 .process = gfx_v8_0_eop_irq,
6988 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
6989 .set = gfx_v8_0_set_priv_reg_fault_state,
6990 .process = gfx_v8_0_priv_reg_irq,
6993 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
6994 .set = gfx_v8_0_set_priv_inst_fault_state,
6995 .process = gfx_v8_0_priv_inst_irq,
6998 static const struct amdgpu_irq_src_funcs gfx_v8_0_kiq_irq_funcs = {
6999 .set = gfx_v8_0_kiq_set_interrupt_state,
7000 .process = gfx_v8_0_kiq_irq,
7003 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
7005 adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
7006 adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
7008 adev->gfx.priv_reg_irq.num_types = 1;
7009 adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
7011 adev->gfx.priv_inst_irq.num_types = 1;
7012 adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
7014 adev->gfx.kiq.irq.num_types = AMDGPU_CP_KIQ_IRQ_LAST;
7015 adev->gfx.kiq.irq.funcs = &gfx_v8_0_kiq_irq_funcs;
7018 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
7020 adev->gfx.rlc.funcs = &iceland_rlc_funcs;
7023 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
7025 /* init asci gds info */
7026 adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
7027 adev->gds.gws.total_size = 64;
7028 adev->gds.oa.total_size = 16;
7030 if (adev->gds.mem.total_size == 64 * 1024) {
7031 adev->gds.mem.gfx_partition_size = 4096;
7032 adev->gds.mem.cs_partition_size = 4096;
7034 adev->gds.gws.gfx_partition_size = 4;
7035 adev->gds.gws.cs_partition_size = 4;
7037 adev->gds.oa.gfx_partition_size = 4;
7038 adev->gds.oa.cs_partition_size = 1;
7040 adev->gds.mem.gfx_partition_size = 1024;
7041 adev->gds.mem.cs_partition_size = 1024;
7043 adev->gds.gws.gfx_partition_size = 16;
7044 adev->gds.gws.cs_partition_size = 16;
7046 adev->gds.oa.gfx_partition_size = 4;
7047 adev->gds.oa.cs_partition_size = 4;
7051 static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
7059 data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7060 data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7062 WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
7065 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
7069 data = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) |
7070 RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
7072 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
7074 return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask;
7077 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
7079 int i, j, k, counter, active_cu_number = 0;
7080 u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
7081 struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
7082 unsigned disable_masks[4 * 2];
7085 memset(cu_info, 0, sizeof(*cu_info));
7087 if (adev->flags & AMD_IS_APU)
7090 ao_cu_num = adev->gfx.config.max_cu_per_sh;
7092 amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
7094 mutex_lock(&adev->grbm_idx_mutex);
7095 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
7096 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
7100 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
7102 gfx_v8_0_set_user_cu_inactive_bitmap(
7103 adev, disable_masks[i * 2 + j]);
7104 bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
7105 cu_info->bitmap[i][j] = bitmap;
7107 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
7108 if (bitmap & mask) {
7109 if (counter < ao_cu_num)
7115 active_cu_number += counter;
7117 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
7118 cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
7121 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
7122 mutex_unlock(&adev->grbm_idx_mutex);
7124 cu_info->number = active_cu_number;
7125 cu_info->ao_cu_mask = ao_cu_mask;
7128 const struct amdgpu_ip_block_version gfx_v8_0_ip_block =
7130 .type = AMD_IP_BLOCK_TYPE_GFX,
7134 .funcs = &gfx_v8_0_ip_funcs,
7137 const struct amdgpu_ip_block_version gfx_v8_1_ip_block =
7139 .type = AMD_IP_BLOCK_TYPE_GFX,
7143 .funcs = &gfx_v8_0_ip_funcs,
7146 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
7148 uint64_t ce_payload_addr;
7151 struct vi_ce_ib_state regular;
7152 struct vi_ce_ib_state_chained_ib chained;
7155 if (ring->adev->virt.chained_ib_support) {
7156 ce_payload_addr = AMDGPU_VA_RESERVED_SIZE - 2 * 4096 +
7157 offsetof(struct vi_gfx_meta_data_chained_ib, ce_payload);
7158 cnt_ce = (sizeof(ce_payload.chained) >> 2) + 4 - 2;
7160 ce_payload_addr = AMDGPU_VA_RESERVED_SIZE - 2 * 4096 +
7161 offsetof(struct vi_gfx_meta_data, ce_payload);
7162 cnt_ce = (sizeof(ce_payload.regular) >> 2) + 4 - 2;
7165 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_ce));
7166 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
7167 WRITE_DATA_DST_SEL(8) |
7169 WRITE_DATA_CACHE_POLICY(0));
7170 amdgpu_ring_write(ring, lower_32_bits(ce_payload_addr));
7171 amdgpu_ring_write(ring, upper_32_bits(ce_payload_addr));
7172 amdgpu_ring_write_multiple(ring, (void *)&ce_payload, cnt_ce - 2);
7175 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring)
7177 uint64_t de_payload_addr, gds_addr, csa_addr;
7180 struct vi_de_ib_state regular;
7181 struct vi_de_ib_state_chained_ib chained;
7184 csa_addr = AMDGPU_VA_RESERVED_SIZE - 2 * 4096;
7185 gds_addr = csa_addr + 4096;
7186 if (ring->adev->virt.chained_ib_support) {
7187 de_payload.chained.gds_backup_addrlo = lower_32_bits(gds_addr);
7188 de_payload.chained.gds_backup_addrhi = upper_32_bits(gds_addr);
7189 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data_chained_ib, de_payload);
7190 cnt_de = (sizeof(de_payload.chained) >> 2) + 4 - 2;
7192 de_payload.regular.gds_backup_addrlo = lower_32_bits(gds_addr);
7193 de_payload.regular.gds_backup_addrhi = upper_32_bits(gds_addr);
7194 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data, de_payload);
7195 cnt_de = (sizeof(de_payload.regular) >> 2) + 4 - 2;
7198 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_de));
7199 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
7200 WRITE_DATA_DST_SEL(8) |
7202 WRITE_DATA_CACHE_POLICY(0));
7203 amdgpu_ring_write(ring, lower_32_bits(de_payload_addr));
7204 amdgpu_ring_write(ring, upper_32_bits(de_payload_addr));
7205 amdgpu_ring_write_multiple(ring, (void *)&de_payload, cnt_de - 2);