2 * Copyright 2014 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
23 #include <linux/firmware.h>
26 #include "amdgpu_gfx.h"
28 #include "vi_structs.h"
30 #include "amdgpu_ucode.h"
31 #include "amdgpu_atombios.h"
32 #include "atombios_i2c.h"
33 #include "clearstate_vi.h"
35 #include "gmc/gmc_8_2_d.h"
36 #include "gmc/gmc_8_2_sh_mask.h"
38 #include "oss/oss_3_0_d.h"
39 #include "oss/oss_3_0_sh_mask.h"
41 #include "bif/bif_5_0_d.h"
42 #include "bif/bif_5_0_sh_mask.h"
44 #include "gca/gfx_8_0_d.h"
45 #include "gca/gfx_8_0_enum.h"
46 #include "gca/gfx_8_0_sh_mask.h"
47 #include "gca/gfx_8_0_enum.h"
49 #include "dce/dce_10_0_d.h"
50 #include "dce/dce_10_0_sh_mask.h"
52 #include "smu/smu_7_1_3_d.h"
54 #define GFX8_NUM_GFX_RINGS 1
55 #define GFX8_NUM_COMPUTE_RINGS 8
57 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
58 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
59 #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
60 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
62 #define ARRAY_MODE(x) ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
63 #define PIPE_CONFIG(x) ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
64 #define TILE_SPLIT(x) ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
65 #define MICRO_TILE_MODE_NEW(x) ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
66 #define SAMPLE_SPLIT(x) ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
67 #define BANK_WIDTH(x) ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
68 #define BANK_HEIGHT(x) ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
69 #define MACRO_TILE_ASPECT(x) ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
70 #define NUM_BANKS(x) ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
72 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK 0x00000001L
73 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK 0x00000002L
74 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK 0x00000004L
75 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK 0x00000008L
76 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK 0x00000010L
77 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK 0x00000020L
80 #define SET_BPM_SERDES_CMD 1
81 #define CLE_BPM_SERDES_CMD 0
83 /* BPM Register Address*/
85 BPM_REG_CGLS_EN = 0, /* Enable/Disable CGLS */
86 BPM_REG_CGLS_ON, /* ON/OFF CGLS: shall be controlled by RLC FW */
87 BPM_REG_CGCG_OVERRIDE, /* Set/Clear CGCG Override */
88 BPM_REG_MGCG_OVERRIDE, /* Set/Clear MGCG Override */
89 BPM_REG_FGCG_OVERRIDE, /* Set/Clear FGCG Override */
93 #define RLC_FormatDirectRegListLength 14
95 MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
96 MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
97 MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
98 MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
99 MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
100 MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
102 MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
103 MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
104 MODULE_FIRMWARE("amdgpu/stoney_me.bin");
105 MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
106 MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
108 MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
109 MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
110 MODULE_FIRMWARE("amdgpu/tonga_me.bin");
111 MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
112 MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
113 MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
115 MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
116 MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
117 MODULE_FIRMWARE("amdgpu/topaz_me.bin");
118 MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
119 MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
121 MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
122 MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
123 MODULE_FIRMWARE("amdgpu/fiji_me.bin");
124 MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
125 MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
126 MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
128 MODULE_FIRMWARE("amdgpu/polaris11_ce.bin");
129 MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin");
130 MODULE_FIRMWARE("amdgpu/polaris11_me.bin");
131 MODULE_FIRMWARE("amdgpu/polaris11_mec.bin");
132 MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin");
133 MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin");
135 MODULE_FIRMWARE("amdgpu/polaris10_ce.bin");
136 MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin");
137 MODULE_FIRMWARE("amdgpu/polaris10_me.bin");
138 MODULE_FIRMWARE("amdgpu/polaris10_mec.bin");
139 MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin");
140 MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin");
142 MODULE_FIRMWARE("amdgpu/polaris12_ce.bin");
143 MODULE_FIRMWARE("amdgpu/polaris12_pfp.bin");
144 MODULE_FIRMWARE("amdgpu/polaris12_me.bin");
145 MODULE_FIRMWARE("amdgpu/polaris12_mec.bin");
146 MODULE_FIRMWARE("amdgpu/polaris12_mec2.bin");
147 MODULE_FIRMWARE("amdgpu/polaris12_rlc.bin");
149 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
151 {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
152 {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
153 {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
154 {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
155 {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
156 {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
157 {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
158 {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
159 {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
160 {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
161 {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
162 {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
163 {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
164 {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
165 {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
166 {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
169 static const u32 golden_settings_tonga_a11[] =
171 mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
172 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
173 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
174 mmGB_GPU_ID, 0x0000000f, 0x00000000,
175 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
176 mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
177 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
178 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
179 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
180 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
181 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
182 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
183 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
184 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
185 mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
186 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
189 static const u32 tonga_golden_common_all[] =
191 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
192 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
193 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
194 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
195 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
196 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
197 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
198 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
201 static const u32 tonga_mgcg_cgcg_init[] =
203 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
204 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
205 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
206 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
207 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
208 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
209 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
210 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
211 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
212 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
213 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
214 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
215 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
216 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
217 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
218 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
219 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
220 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
221 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
222 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
223 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
224 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
225 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
226 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
227 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
228 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
229 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
230 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
231 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
232 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
233 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
234 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
235 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
236 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
237 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
238 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
239 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
240 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
241 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
242 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
243 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
244 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
245 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
246 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
247 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
248 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
249 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
250 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
251 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
252 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
253 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
254 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
255 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
256 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
257 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
258 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
259 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
260 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
261 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
262 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
263 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
264 mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
265 mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
266 mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
267 mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
268 mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
269 mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
270 mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
271 mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
272 mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
273 mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
274 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
275 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
276 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
277 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
280 static const u32 golden_settings_polaris11_a11[] =
282 mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208,
283 mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
284 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
285 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
286 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
287 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
288 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
289 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
290 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
291 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
292 mmSQ_CONFIG, 0x07f80000, 0x01180000,
293 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
294 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
295 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
296 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
297 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
298 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
301 static const u32 polaris11_golden_common_all[] =
303 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
304 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002,
305 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
306 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
307 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
308 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
311 static const u32 golden_settings_polaris10_a11[] =
313 mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
314 mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
315 mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
316 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
317 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
318 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
319 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
320 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
321 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a,
322 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
323 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
324 mmSQ_CONFIG, 0x07f80000, 0x07180000,
325 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
326 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
327 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
328 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
329 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
332 static const u32 polaris10_golden_common_all[] =
334 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
335 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
336 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
337 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
338 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
339 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
340 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
341 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
344 static const u32 fiji_golden_common_all[] =
346 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
347 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
348 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
349 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
350 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
351 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
352 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
353 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
354 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
355 mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
358 static const u32 golden_settings_fiji_a10[] =
360 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
361 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
362 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
363 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
364 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
365 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
366 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
367 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
368 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
369 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
370 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
373 static const u32 fiji_mgcg_cgcg_init[] =
375 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
376 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
377 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
378 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
379 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
380 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
381 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
382 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
383 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
384 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
385 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
386 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
387 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
388 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
389 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
390 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
391 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
392 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
393 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
394 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
395 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
396 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
397 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
398 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
399 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
400 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
401 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
402 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
403 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
404 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
405 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
406 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
407 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
408 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
409 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
412 static const u32 golden_settings_iceland_a11[] =
414 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
415 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
416 mmDB_DEBUG3, 0xc0000000, 0xc0000000,
417 mmGB_GPU_ID, 0x0000000f, 0x00000000,
418 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
419 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
420 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
421 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
422 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
423 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
424 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
425 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
426 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
427 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
428 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
429 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
432 static const u32 iceland_golden_common_all[] =
434 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
435 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
436 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
437 mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
438 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
439 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
440 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
441 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
444 static const u32 iceland_mgcg_cgcg_init[] =
446 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
447 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
448 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
449 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
450 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
451 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
452 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
453 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
454 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
455 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
456 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
457 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
458 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
459 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
460 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
461 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
462 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
463 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
464 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
465 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
466 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
467 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
468 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
469 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
470 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
471 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
472 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
473 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
474 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
475 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
476 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
477 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
478 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
479 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
480 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
481 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
482 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
483 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
484 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
485 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
486 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
487 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
488 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
489 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
490 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
491 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
492 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
493 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
494 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
495 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
496 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
497 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
498 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
499 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
500 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
501 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
502 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
503 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
504 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
505 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
506 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
507 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
508 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
509 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
512 static const u32 cz_golden_settings_a11[] =
514 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
515 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
516 mmGB_GPU_ID, 0x0000000f, 0x00000000,
517 mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
518 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
519 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
520 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
521 mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
522 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
523 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
524 mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
525 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
528 static const u32 cz_golden_common_all[] =
530 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
531 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
532 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
533 mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
534 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
535 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
536 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
537 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
540 static const u32 cz_mgcg_cgcg_init[] =
542 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
543 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
544 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
545 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
546 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
547 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
548 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
549 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
550 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
551 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
552 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
553 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
554 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
555 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
556 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
557 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
558 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
559 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
560 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
561 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
562 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
563 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
564 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
565 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
566 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
567 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
568 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
569 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
570 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
571 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
572 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
573 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
574 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
575 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
576 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
577 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
578 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
579 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
580 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
581 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
582 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
583 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
584 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
585 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
586 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
587 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
588 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
589 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
590 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
591 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
592 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
593 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
594 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
595 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
596 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
597 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
598 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
599 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
600 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
601 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
602 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
603 mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
604 mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
605 mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
606 mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
607 mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
608 mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
609 mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
610 mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
611 mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
612 mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
613 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
614 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
615 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
616 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
619 static const u32 stoney_golden_settings_a11[] =
621 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
622 mmGB_GPU_ID, 0x0000000f, 0x00000000,
623 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
624 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
625 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
626 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
627 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
628 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
629 mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
630 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
633 static const u32 stoney_golden_common_all[] =
635 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
636 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
637 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
638 mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
639 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
640 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
641 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
642 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
645 static const u32 stoney_mgcg_cgcg_init[] =
647 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
648 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
649 mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
650 mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
651 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
654 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
655 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
656 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
657 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
658 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
659 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
660 static void gfx_v8_0_ring_emit_ce_meta_init(struct amdgpu_ring *ring, uint64_t addr);
661 static void gfx_v8_0_ring_emit_de_meta_init(struct amdgpu_ring *ring, uint64_t addr);
662 static int gfx_v8_0_compute_mqd_sw_init(struct amdgpu_device *adev);
663 static void gfx_v8_0_compute_mqd_sw_fini(struct amdgpu_device *adev);
665 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
667 switch (adev->asic_type) {
669 amdgpu_program_register_sequence(adev,
670 iceland_mgcg_cgcg_init,
671 (const u32)ARRAY_SIZE(iceland_mgcg_cgcg_init));
672 amdgpu_program_register_sequence(adev,
673 golden_settings_iceland_a11,
674 (const u32)ARRAY_SIZE(golden_settings_iceland_a11));
675 amdgpu_program_register_sequence(adev,
676 iceland_golden_common_all,
677 (const u32)ARRAY_SIZE(iceland_golden_common_all));
680 amdgpu_program_register_sequence(adev,
682 (const u32)ARRAY_SIZE(fiji_mgcg_cgcg_init));
683 amdgpu_program_register_sequence(adev,
684 golden_settings_fiji_a10,
685 (const u32)ARRAY_SIZE(golden_settings_fiji_a10));
686 amdgpu_program_register_sequence(adev,
687 fiji_golden_common_all,
688 (const u32)ARRAY_SIZE(fiji_golden_common_all));
692 amdgpu_program_register_sequence(adev,
693 tonga_mgcg_cgcg_init,
694 (const u32)ARRAY_SIZE(tonga_mgcg_cgcg_init));
695 amdgpu_program_register_sequence(adev,
696 golden_settings_tonga_a11,
697 (const u32)ARRAY_SIZE(golden_settings_tonga_a11));
698 amdgpu_program_register_sequence(adev,
699 tonga_golden_common_all,
700 (const u32)ARRAY_SIZE(tonga_golden_common_all));
704 amdgpu_program_register_sequence(adev,
705 golden_settings_polaris11_a11,
706 (const u32)ARRAY_SIZE(golden_settings_polaris11_a11));
707 amdgpu_program_register_sequence(adev,
708 polaris11_golden_common_all,
709 (const u32)ARRAY_SIZE(polaris11_golden_common_all));
712 amdgpu_program_register_sequence(adev,
713 golden_settings_polaris10_a11,
714 (const u32)ARRAY_SIZE(golden_settings_polaris10_a11));
715 amdgpu_program_register_sequence(adev,
716 polaris10_golden_common_all,
717 (const u32)ARRAY_SIZE(polaris10_golden_common_all));
718 WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C);
719 if (adev->pdev->revision == 0xc7 &&
720 ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) ||
721 (adev->pdev->subsystem_device == 0x4a8 && adev->pdev->subsystem_vendor == 0x1043) ||
722 (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1682))) {
723 amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD);
724 amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0);
728 amdgpu_program_register_sequence(adev,
730 (const u32)ARRAY_SIZE(cz_mgcg_cgcg_init));
731 amdgpu_program_register_sequence(adev,
732 cz_golden_settings_a11,
733 (const u32)ARRAY_SIZE(cz_golden_settings_a11));
734 amdgpu_program_register_sequence(adev,
735 cz_golden_common_all,
736 (const u32)ARRAY_SIZE(cz_golden_common_all));
739 amdgpu_program_register_sequence(adev,
740 stoney_mgcg_cgcg_init,
741 (const u32)ARRAY_SIZE(stoney_mgcg_cgcg_init));
742 amdgpu_program_register_sequence(adev,
743 stoney_golden_settings_a11,
744 (const u32)ARRAY_SIZE(stoney_golden_settings_a11));
745 amdgpu_program_register_sequence(adev,
746 stoney_golden_common_all,
747 (const u32)ARRAY_SIZE(stoney_golden_common_all));
754 static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
756 adev->gfx.scratch.num_reg = 7;
757 adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
758 adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
761 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
763 struct amdgpu_device *adev = ring->adev;
769 r = amdgpu_gfx_scratch_get(adev, &scratch);
771 DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
774 WREG32(scratch, 0xCAFEDEAD);
775 r = amdgpu_ring_alloc(ring, 3);
777 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
779 amdgpu_gfx_scratch_free(adev, scratch);
782 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
783 amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
784 amdgpu_ring_write(ring, 0xDEADBEEF);
785 amdgpu_ring_commit(ring);
787 for (i = 0; i < adev->usec_timeout; i++) {
788 tmp = RREG32(scratch);
789 if (tmp == 0xDEADBEEF)
793 if (i < adev->usec_timeout) {
794 DRM_INFO("ring test on %d succeeded in %d usecs\n",
797 DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
798 ring->idx, scratch, tmp);
801 amdgpu_gfx_scratch_free(adev, scratch);
805 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
807 struct amdgpu_device *adev = ring->adev;
809 struct dma_fence *f = NULL;
814 r = amdgpu_gfx_scratch_get(adev, &scratch);
816 DRM_ERROR("amdgpu: failed to get scratch reg (%ld).\n", r);
819 WREG32(scratch, 0xCAFEDEAD);
820 memset(&ib, 0, sizeof(ib));
821 r = amdgpu_ib_get(adev, NULL, 256, &ib);
823 DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
826 ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
827 ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START));
828 ib.ptr[2] = 0xDEADBEEF;
831 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
835 r = dma_fence_wait_timeout(f, false, timeout);
837 DRM_ERROR("amdgpu: IB test timed out.\n");
841 DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
844 tmp = RREG32(scratch);
845 if (tmp == 0xDEADBEEF) {
846 DRM_INFO("ib test on ring %d succeeded\n", ring->idx);
849 DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n",
854 amdgpu_ib_free(adev, &ib, NULL);
857 amdgpu_gfx_scratch_free(adev, scratch);
862 static void gfx_v8_0_free_microcode(struct amdgpu_device *adev) {
863 release_firmware(adev->gfx.pfp_fw);
864 adev->gfx.pfp_fw = NULL;
865 release_firmware(adev->gfx.me_fw);
866 adev->gfx.me_fw = NULL;
867 release_firmware(adev->gfx.ce_fw);
868 adev->gfx.ce_fw = NULL;
869 release_firmware(adev->gfx.rlc_fw);
870 adev->gfx.rlc_fw = NULL;
871 release_firmware(adev->gfx.mec_fw);
872 adev->gfx.mec_fw = NULL;
873 if ((adev->asic_type != CHIP_STONEY) &&
874 (adev->asic_type != CHIP_TOPAZ))
875 release_firmware(adev->gfx.mec2_fw);
876 adev->gfx.mec2_fw = NULL;
878 kfree(adev->gfx.rlc.register_list_format);
881 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
883 const char *chip_name;
886 struct amdgpu_firmware_info *info = NULL;
887 const struct common_firmware_header *header = NULL;
888 const struct gfx_firmware_header_v1_0 *cp_hdr;
889 const struct rlc_firmware_header_v2_0 *rlc_hdr;
890 unsigned int *tmp = NULL, i;
894 switch (adev->asic_type) {
902 chip_name = "carrizo";
908 chip_name = "polaris11";
911 chip_name = "polaris10";
914 chip_name = "polaris12";
917 chip_name = "stoney";
923 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
924 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
927 err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
930 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
931 adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
932 adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
934 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
935 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
938 err = amdgpu_ucode_validate(adev->gfx.me_fw);
941 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
942 adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
944 /* chain ib ucode isn't formal released, just disable it by far
945 * TODO: when ucod ready we should use ucode version to judge if
946 * chain-ib support or not.
948 adev->virt.chained_ib_support = false;
950 adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
952 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
953 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
956 err = amdgpu_ucode_validate(adev->gfx.ce_fw);
959 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
960 adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
961 adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
963 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
964 err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
967 err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
968 rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
969 adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
970 adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
972 adev->gfx.rlc.save_and_restore_offset =
973 le32_to_cpu(rlc_hdr->save_and_restore_offset);
974 adev->gfx.rlc.clear_state_descriptor_offset =
975 le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
976 adev->gfx.rlc.avail_scratch_ram_locations =
977 le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
978 adev->gfx.rlc.reg_restore_list_size =
979 le32_to_cpu(rlc_hdr->reg_restore_list_size);
980 adev->gfx.rlc.reg_list_format_start =
981 le32_to_cpu(rlc_hdr->reg_list_format_start);
982 adev->gfx.rlc.reg_list_format_separate_start =
983 le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
984 adev->gfx.rlc.starting_offsets_start =
985 le32_to_cpu(rlc_hdr->starting_offsets_start);
986 adev->gfx.rlc.reg_list_format_size_bytes =
987 le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
988 adev->gfx.rlc.reg_list_size_bytes =
989 le32_to_cpu(rlc_hdr->reg_list_size_bytes);
991 adev->gfx.rlc.register_list_format =
992 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
993 adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
995 if (!adev->gfx.rlc.register_list_format) {
1000 tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1001 le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1002 for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++)
1003 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
1005 adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1007 tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1008 le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1009 for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++)
1010 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
1012 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1013 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1016 err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1019 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1020 adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1021 adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1023 if ((adev->asic_type != CHIP_STONEY) &&
1024 (adev->asic_type != CHIP_TOPAZ)) {
1025 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1026 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1028 err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1031 cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1032 adev->gfx.mec2_fw->data;
1033 adev->gfx.mec2_fw_version =
1034 le32_to_cpu(cp_hdr->header.ucode_version);
1035 adev->gfx.mec2_feature_version =
1036 le32_to_cpu(cp_hdr->ucode_feature_version);
1039 adev->gfx.mec2_fw = NULL;
1043 if (adev->firmware.load_type == AMDGPU_FW_LOAD_SMU) {
1044 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1045 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1046 info->fw = adev->gfx.pfp_fw;
1047 header = (const struct common_firmware_header *)info->fw->data;
1048 adev->firmware.fw_size +=
1049 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1051 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1052 info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1053 info->fw = adev->gfx.me_fw;
1054 header = (const struct common_firmware_header *)info->fw->data;
1055 adev->firmware.fw_size +=
1056 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1058 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1059 info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1060 info->fw = adev->gfx.ce_fw;
1061 header = (const struct common_firmware_header *)info->fw->data;
1062 adev->firmware.fw_size +=
1063 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1065 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1066 info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1067 info->fw = adev->gfx.rlc_fw;
1068 header = (const struct common_firmware_header *)info->fw->data;
1069 adev->firmware.fw_size +=
1070 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1072 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1073 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1074 info->fw = adev->gfx.mec_fw;
1075 header = (const struct common_firmware_header *)info->fw->data;
1076 adev->firmware.fw_size +=
1077 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1079 /* we need account JT in */
1080 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1081 adev->firmware.fw_size +=
1082 ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE);
1084 if (amdgpu_sriov_vf(adev)) {
1085 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_STORAGE];
1086 info->ucode_id = AMDGPU_UCODE_ID_STORAGE;
1087 info->fw = adev->gfx.mec_fw;
1088 adev->firmware.fw_size +=
1089 ALIGN(le32_to_cpu(64 * PAGE_SIZE), PAGE_SIZE);
1092 if (adev->gfx.mec2_fw) {
1093 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1094 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1095 info->fw = adev->gfx.mec2_fw;
1096 header = (const struct common_firmware_header *)info->fw->data;
1097 adev->firmware.fw_size +=
1098 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1106 "gfx8: Failed to load firmware \"%s\"\n",
1108 release_firmware(adev->gfx.pfp_fw);
1109 adev->gfx.pfp_fw = NULL;
1110 release_firmware(adev->gfx.me_fw);
1111 adev->gfx.me_fw = NULL;
1112 release_firmware(adev->gfx.ce_fw);
1113 adev->gfx.ce_fw = NULL;
1114 release_firmware(adev->gfx.rlc_fw);
1115 adev->gfx.rlc_fw = NULL;
1116 release_firmware(adev->gfx.mec_fw);
1117 adev->gfx.mec_fw = NULL;
1118 release_firmware(adev->gfx.mec2_fw);
1119 adev->gfx.mec2_fw = NULL;
1124 static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
1125 volatile u32 *buffer)
1128 const struct cs_section_def *sect = NULL;
1129 const struct cs_extent_def *ext = NULL;
1131 if (adev->gfx.rlc.cs_data == NULL)
1136 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1137 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1139 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1140 buffer[count++] = cpu_to_le32(0x80000000);
1141 buffer[count++] = cpu_to_le32(0x80000000);
1143 for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1144 for (ext = sect->section; ext->extent != NULL; ++ext) {
1145 if (sect->id == SECT_CONTEXT) {
1147 cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1148 buffer[count++] = cpu_to_le32(ext->reg_index -
1149 PACKET3_SET_CONTEXT_REG_START);
1150 for (i = 0; i < ext->reg_count; i++)
1151 buffer[count++] = cpu_to_le32(ext->extent[i]);
1158 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1159 buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
1160 PACKET3_SET_CONTEXT_REG_START);
1161 buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config);
1162 buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1);
1164 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1165 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1167 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1168 buffer[count++] = cpu_to_le32(0);
1171 static void cz_init_cp_jump_table(struct amdgpu_device *adev)
1173 const __le32 *fw_data;
1174 volatile u32 *dst_ptr;
1175 int me, i, max_me = 4;
1177 u32 table_offset, table_size;
1179 if (adev->asic_type == CHIP_CARRIZO)
1182 /* write the cp table buffer */
1183 dst_ptr = adev->gfx.rlc.cp_table_ptr;
1184 for (me = 0; me < max_me; me++) {
1186 const struct gfx_firmware_header_v1_0 *hdr =
1187 (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1188 fw_data = (const __le32 *)
1189 (adev->gfx.ce_fw->data +
1190 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1191 table_offset = le32_to_cpu(hdr->jt_offset);
1192 table_size = le32_to_cpu(hdr->jt_size);
1193 } else if (me == 1) {
1194 const struct gfx_firmware_header_v1_0 *hdr =
1195 (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1196 fw_data = (const __le32 *)
1197 (adev->gfx.pfp_fw->data +
1198 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1199 table_offset = le32_to_cpu(hdr->jt_offset);
1200 table_size = le32_to_cpu(hdr->jt_size);
1201 } else if (me == 2) {
1202 const struct gfx_firmware_header_v1_0 *hdr =
1203 (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1204 fw_data = (const __le32 *)
1205 (adev->gfx.me_fw->data +
1206 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1207 table_offset = le32_to_cpu(hdr->jt_offset);
1208 table_size = le32_to_cpu(hdr->jt_size);
1209 } else if (me == 3) {
1210 const struct gfx_firmware_header_v1_0 *hdr =
1211 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1212 fw_data = (const __le32 *)
1213 (adev->gfx.mec_fw->data +
1214 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1215 table_offset = le32_to_cpu(hdr->jt_offset);
1216 table_size = le32_to_cpu(hdr->jt_size);
1217 } else if (me == 4) {
1218 const struct gfx_firmware_header_v1_0 *hdr =
1219 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
1220 fw_data = (const __le32 *)
1221 (adev->gfx.mec2_fw->data +
1222 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1223 table_offset = le32_to_cpu(hdr->jt_offset);
1224 table_size = le32_to_cpu(hdr->jt_size);
1227 for (i = 0; i < table_size; i ++) {
1228 dst_ptr[bo_offset + i] =
1229 cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
1232 bo_offset += table_size;
1236 static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev)
1240 /* clear state block */
1241 if (adev->gfx.rlc.clear_state_obj) {
1242 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1243 if (unlikely(r != 0))
1244 dev_warn(adev->dev, "(%d) reserve RLC cbs bo failed\n", r);
1245 amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
1246 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1247 amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
1248 adev->gfx.rlc.clear_state_obj = NULL;
1251 /* jump table block */
1252 if (adev->gfx.rlc.cp_table_obj) {
1253 r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, false);
1254 if (unlikely(r != 0))
1255 dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r);
1256 amdgpu_bo_unpin(adev->gfx.rlc.cp_table_obj);
1257 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1258 amdgpu_bo_unref(&adev->gfx.rlc.cp_table_obj);
1259 adev->gfx.rlc.cp_table_obj = NULL;
1263 static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
1265 volatile u32 *dst_ptr;
1267 const struct cs_section_def *cs_data;
1270 adev->gfx.rlc.cs_data = vi_cs_data;
1272 cs_data = adev->gfx.rlc.cs_data;
1275 /* clear state block */
1276 adev->gfx.rlc.clear_state_size = dws = gfx_v8_0_get_csb_size(adev);
1278 if (adev->gfx.rlc.clear_state_obj == NULL) {
1279 r = amdgpu_bo_create(adev, dws * 4, PAGE_SIZE, true,
1280 AMDGPU_GEM_DOMAIN_VRAM,
1281 AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
1282 AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
1284 &adev->gfx.rlc.clear_state_obj);
1286 dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r);
1287 gfx_v8_0_rlc_fini(adev);
1291 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1292 if (unlikely(r != 0)) {
1293 gfx_v8_0_rlc_fini(adev);
1296 r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj, AMDGPU_GEM_DOMAIN_VRAM,
1297 &adev->gfx.rlc.clear_state_gpu_addr);
1299 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1300 dev_warn(adev->dev, "(%d) pin RLC cbs bo failed\n", r);
1301 gfx_v8_0_rlc_fini(adev);
1305 r = amdgpu_bo_kmap(adev->gfx.rlc.clear_state_obj, (void **)&adev->gfx.rlc.cs_ptr);
1307 dev_warn(adev->dev, "(%d) map RLC cbs bo failed\n", r);
1308 gfx_v8_0_rlc_fini(adev);
1311 /* set up the cs buffer */
1312 dst_ptr = adev->gfx.rlc.cs_ptr;
1313 gfx_v8_0_get_csb_buffer(adev, dst_ptr);
1314 amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
1315 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1318 if ((adev->asic_type == CHIP_CARRIZO) ||
1319 (adev->asic_type == CHIP_STONEY)) {
1320 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1321 if (adev->gfx.rlc.cp_table_obj == NULL) {
1322 r = amdgpu_bo_create(adev, adev->gfx.rlc.cp_table_size, PAGE_SIZE, true,
1323 AMDGPU_GEM_DOMAIN_VRAM,
1324 AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
1325 AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
1327 &adev->gfx.rlc.cp_table_obj);
1329 dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r);
1334 r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, false);
1335 if (unlikely(r != 0)) {
1336 dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r);
1339 r = amdgpu_bo_pin(adev->gfx.rlc.cp_table_obj, AMDGPU_GEM_DOMAIN_VRAM,
1340 &adev->gfx.rlc.cp_table_gpu_addr);
1342 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1343 dev_warn(adev->dev, "(%d) pin RLC cp table bo failed\n", r);
1346 r = amdgpu_bo_kmap(adev->gfx.rlc.cp_table_obj, (void **)&adev->gfx.rlc.cp_table_ptr);
1348 dev_warn(adev->dev, "(%d) map RLC cp table bo failed\n", r);
1352 cz_init_cp_jump_table(adev);
1354 amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj);
1355 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1361 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
1365 if (adev->gfx.mec.hpd_eop_obj) {
1366 r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
1367 if (unlikely(r != 0))
1368 dev_warn(adev->dev, "(%d) reserve HPD EOP bo failed\n", r);
1369 amdgpu_bo_unpin(adev->gfx.mec.hpd_eop_obj);
1370 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1371 amdgpu_bo_unref(&adev->gfx.mec.hpd_eop_obj);
1372 adev->gfx.mec.hpd_eop_obj = NULL;
1376 static int gfx_v8_0_kiq_init_ring(struct amdgpu_device *adev,
1377 struct amdgpu_ring *ring,
1378 struct amdgpu_irq_src *irq)
1380 struct amdgpu_kiq *kiq = &adev->gfx.kiq;
1383 r = amdgpu_wb_get(adev, &adev->virt.reg_val_offs);
1388 ring->ring_obj = NULL;
1389 ring->use_doorbell = true;
1390 ring->doorbell_index = AMDGPU_DOORBELL_KIQ;
1391 if (adev->gfx.mec2_fw) {
1400 ring->eop_gpu_addr = kiq->eop_gpu_addr;
1401 sprintf(ring->name, "kiq %d.%d.%d", ring->me, ring->pipe, ring->queue);
1402 r = amdgpu_ring_init(adev, ring, 1024,
1403 irq, AMDGPU_CP_KIQ_IRQ_DRIVER0);
1405 dev_warn(adev->dev, "(%d) failed to init kiq ring\n", r);
1409 static void gfx_v8_0_kiq_free_ring(struct amdgpu_ring *ring,
1410 struct amdgpu_irq_src *irq)
1412 amdgpu_wb_free(ring->adev, ring->adev->virt.reg_val_offs);
1413 amdgpu_ring_fini(ring);
1416 #define MEC_HPD_SIZE 2048
1418 static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
1424 * we assign only 1 pipe because all other pipes will
1427 adev->gfx.mec.num_mec = 1;
1428 adev->gfx.mec.num_pipe = 1;
1429 adev->gfx.mec.num_queue = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * 8;
1431 if (adev->gfx.mec.hpd_eop_obj == NULL) {
1432 r = amdgpu_bo_create(adev,
1433 adev->gfx.mec.num_queue * MEC_HPD_SIZE,
1435 AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
1436 &adev->gfx.mec.hpd_eop_obj);
1438 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1443 r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
1444 if (unlikely(r != 0)) {
1445 gfx_v8_0_mec_fini(adev);
1448 r = amdgpu_bo_pin(adev->gfx.mec.hpd_eop_obj, AMDGPU_GEM_DOMAIN_GTT,
1449 &adev->gfx.mec.hpd_eop_gpu_addr);
1451 dev_warn(adev->dev, "(%d) pin HDP EOP bo failed\n", r);
1452 gfx_v8_0_mec_fini(adev);
1455 r = amdgpu_bo_kmap(adev->gfx.mec.hpd_eop_obj, (void **)&hpd);
1457 dev_warn(adev->dev, "(%d) map HDP EOP bo failed\n", r);
1458 gfx_v8_0_mec_fini(adev);
1462 memset(hpd, 0, adev->gfx.mec.num_queue * MEC_HPD_SIZE);
1464 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1465 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1470 static void gfx_v8_0_kiq_fini(struct amdgpu_device *adev)
1472 struct amdgpu_kiq *kiq = &adev->gfx.kiq;
1474 amdgpu_bo_free_kernel(&kiq->eop_obj, &kiq->eop_gpu_addr, NULL);
1477 static int gfx_v8_0_kiq_init(struct amdgpu_device *adev)
1481 struct amdgpu_kiq *kiq = &adev->gfx.kiq;
1483 r = amdgpu_bo_create_kernel(adev, MEC_HPD_SIZE, PAGE_SIZE,
1484 AMDGPU_GEM_DOMAIN_GTT, &kiq->eop_obj,
1485 &kiq->eop_gpu_addr, (void **)&hpd);
1487 dev_warn(adev->dev, "failed to create KIQ bo (%d).\n", r);
1491 memset(hpd, 0, MEC_HPD_SIZE);
1493 r = amdgpu_bo_reserve(kiq->eop_obj, false);
1494 if (unlikely(r != 0))
1495 dev_warn(adev->dev, "(%d) reserve kiq eop bo failed\n", r);
1496 amdgpu_bo_kunmap(kiq->eop_obj);
1497 amdgpu_bo_unreserve(kiq->eop_obj);
1502 static const u32 vgpr_init_compute_shader[] =
1504 0x7e000209, 0x7e020208,
1505 0x7e040207, 0x7e060206,
1506 0x7e080205, 0x7e0a0204,
1507 0x7e0c0203, 0x7e0e0202,
1508 0x7e100201, 0x7e120200,
1509 0x7e140209, 0x7e160208,
1510 0x7e180207, 0x7e1a0206,
1511 0x7e1c0205, 0x7e1e0204,
1512 0x7e200203, 0x7e220202,
1513 0x7e240201, 0x7e260200,
1514 0x7e280209, 0x7e2a0208,
1515 0x7e2c0207, 0x7e2e0206,
1516 0x7e300205, 0x7e320204,
1517 0x7e340203, 0x7e360202,
1518 0x7e380201, 0x7e3a0200,
1519 0x7e3c0209, 0x7e3e0208,
1520 0x7e400207, 0x7e420206,
1521 0x7e440205, 0x7e460204,
1522 0x7e480203, 0x7e4a0202,
1523 0x7e4c0201, 0x7e4e0200,
1524 0x7e500209, 0x7e520208,
1525 0x7e540207, 0x7e560206,
1526 0x7e580205, 0x7e5a0204,
1527 0x7e5c0203, 0x7e5e0202,
1528 0x7e600201, 0x7e620200,
1529 0x7e640209, 0x7e660208,
1530 0x7e680207, 0x7e6a0206,
1531 0x7e6c0205, 0x7e6e0204,
1532 0x7e700203, 0x7e720202,
1533 0x7e740201, 0x7e760200,
1534 0x7e780209, 0x7e7a0208,
1535 0x7e7c0207, 0x7e7e0206,
1536 0xbf8a0000, 0xbf810000,
1539 static const u32 sgpr_init_compute_shader[] =
1541 0xbe8a0100, 0xbe8c0102,
1542 0xbe8e0104, 0xbe900106,
1543 0xbe920108, 0xbe940100,
1544 0xbe960102, 0xbe980104,
1545 0xbe9a0106, 0xbe9c0108,
1546 0xbe9e0100, 0xbea00102,
1547 0xbea20104, 0xbea40106,
1548 0xbea60108, 0xbea80100,
1549 0xbeaa0102, 0xbeac0104,
1550 0xbeae0106, 0xbeb00108,
1551 0xbeb20100, 0xbeb40102,
1552 0xbeb60104, 0xbeb80106,
1553 0xbeba0108, 0xbebc0100,
1554 0xbebe0102, 0xbec00104,
1555 0xbec20106, 0xbec40108,
1556 0xbec60100, 0xbec80102,
1557 0xbee60004, 0xbee70005,
1558 0xbeea0006, 0xbeeb0007,
1559 0xbee80008, 0xbee90009,
1560 0xbefc0000, 0xbf8a0000,
1561 0xbf810000, 0x00000000,
1564 static const u32 vgpr_init_regs[] =
1566 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1567 mmCOMPUTE_RESOURCE_LIMITS, 0,
1568 mmCOMPUTE_NUM_THREAD_X, 256*4,
1569 mmCOMPUTE_NUM_THREAD_Y, 1,
1570 mmCOMPUTE_NUM_THREAD_Z, 1,
1571 mmCOMPUTE_PGM_RSRC2, 20,
1572 mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1573 mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1574 mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1575 mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1576 mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1577 mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1578 mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1579 mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1580 mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1581 mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1584 static const u32 sgpr1_init_regs[] =
1586 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1587 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1588 mmCOMPUTE_NUM_THREAD_X, 256*5,
1589 mmCOMPUTE_NUM_THREAD_Y, 1,
1590 mmCOMPUTE_NUM_THREAD_Z, 1,
1591 mmCOMPUTE_PGM_RSRC2, 20,
1592 mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1593 mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1594 mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1595 mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1596 mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1597 mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1598 mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1599 mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1600 mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1601 mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1604 static const u32 sgpr2_init_regs[] =
1606 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1607 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1608 mmCOMPUTE_NUM_THREAD_X, 256*5,
1609 mmCOMPUTE_NUM_THREAD_Y, 1,
1610 mmCOMPUTE_NUM_THREAD_Z, 1,
1611 mmCOMPUTE_PGM_RSRC2, 20,
1612 mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1613 mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1614 mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1615 mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1616 mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1617 mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1618 mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1619 mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1620 mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1621 mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1624 static const u32 sec_ded_counter_registers[] =
1627 mmCPC_EDC_SCRATCH_CNT,
1628 mmCPC_EDC_UCODE_CNT,
1635 mmDC_EDC_CSINVOC_CNT,
1636 mmDC_EDC_RESTORE_CNT,
1642 mmSQC_ATC_EDC_GATCL1_CNT,
1648 mmTCP_ATC_EDC_GATCL1_CNT,
1653 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1655 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1656 struct amdgpu_ib ib;
1657 struct dma_fence *f = NULL;
1660 unsigned total_size, vgpr_offset, sgpr_offset;
1663 /* only supported on CZ */
1664 if (adev->asic_type != CHIP_CARRIZO)
1667 /* bail if the compute ring is not ready */
1671 tmp = RREG32(mmGB_EDC_MODE);
1672 WREG32(mmGB_EDC_MODE, 0);
1675 (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1677 (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1679 (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1680 total_size = ALIGN(total_size, 256);
1681 vgpr_offset = total_size;
1682 total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1683 sgpr_offset = total_size;
1684 total_size += sizeof(sgpr_init_compute_shader);
1686 /* allocate an indirect buffer to put the commands in */
1687 memset(&ib, 0, sizeof(ib));
1688 r = amdgpu_ib_get(adev, NULL, total_size, &ib);
1690 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1694 /* load the compute shaders */
1695 for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1696 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1698 for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1699 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1701 /* init the ib length to 0 */
1705 /* write the register state for the compute dispatch */
1706 for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1707 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1708 ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1709 ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1711 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1712 gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1713 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1714 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1715 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1716 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1718 /* write dispatch packet */
1719 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1720 ib.ptr[ib.length_dw++] = 8; /* x */
1721 ib.ptr[ib.length_dw++] = 1; /* y */
1722 ib.ptr[ib.length_dw++] = 1; /* z */
1723 ib.ptr[ib.length_dw++] =
1724 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1726 /* write CS partial flush packet */
1727 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1728 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1731 /* write the register state for the compute dispatch */
1732 for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1733 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1734 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1735 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1737 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1738 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1739 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1740 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1741 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1742 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1744 /* write dispatch packet */
1745 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1746 ib.ptr[ib.length_dw++] = 8; /* x */
1747 ib.ptr[ib.length_dw++] = 1; /* y */
1748 ib.ptr[ib.length_dw++] = 1; /* z */
1749 ib.ptr[ib.length_dw++] =
1750 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1752 /* write CS partial flush packet */
1753 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1754 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1757 /* write the register state for the compute dispatch */
1758 for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1759 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1760 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1761 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1763 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1764 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1765 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1766 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1767 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1768 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1770 /* write dispatch packet */
1771 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1772 ib.ptr[ib.length_dw++] = 8; /* x */
1773 ib.ptr[ib.length_dw++] = 1; /* y */
1774 ib.ptr[ib.length_dw++] = 1; /* z */
1775 ib.ptr[ib.length_dw++] =
1776 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1778 /* write CS partial flush packet */
1779 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1780 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1782 /* shedule the ib on the ring */
1783 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1785 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1789 /* wait for the GPU to finish processing the IB */
1790 r = dma_fence_wait(f, false);
1792 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1796 tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1797 tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1798 WREG32(mmGB_EDC_MODE, tmp);
1800 tmp = RREG32(mmCC_GC_EDC_CONFIG);
1801 tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1802 WREG32(mmCC_GC_EDC_CONFIG, tmp);
1805 /* read back registers to clear the counters */
1806 for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1807 RREG32(sec_ded_counter_registers[i]);
1810 amdgpu_ib_free(adev, &ib, NULL);
1816 static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1819 u32 mc_shared_chmap, mc_arb_ramcfg;
1820 u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1824 switch (adev->asic_type) {
1826 adev->gfx.config.max_shader_engines = 1;
1827 adev->gfx.config.max_tile_pipes = 2;
1828 adev->gfx.config.max_cu_per_sh = 6;
1829 adev->gfx.config.max_sh_per_se = 1;
1830 adev->gfx.config.max_backends_per_se = 2;
1831 adev->gfx.config.max_texture_channel_caches = 2;
1832 adev->gfx.config.max_gprs = 256;
1833 adev->gfx.config.max_gs_threads = 32;
1834 adev->gfx.config.max_hw_contexts = 8;
1836 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1837 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1838 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1839 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1840 gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1843 adev->gfx.config.max_shader_engines = 4;
1844 adev->gfx.config.max_tile_pipes = 16;
1845 adev->gfx.config.max_cu_per_sh = 16;
1846 adev->gfx.config.max_sh_per_se = 1;
1847 adev->gfx.config.max_backends_per_se = 4;
1848 adev->gfx.config.max_texture_channel_caches = 16;
1849 adev->gfx.config.max_gprs = 256;
1850 adev->gfx.config.max_gs_threads = 32;
1851 adev->gfx.config.max_hw_contexts = 8;
1853 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1854 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1855 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1856 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1857 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1859 case CHIP_POLARIS11:
1860 case CHIP_POLARIS12:
1861 ret = amdgpu_atombios_get_gfx_info(adev);
1864 adev->gfx.config.max_gprs = 256;
1865 adev->gfx.config.max_gs_threads = 32;
1866 adev->gfx.config.max_hw_contexts = 8;
1868 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1869 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1870 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1871 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1872 gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
1874 case CHIP_POLARIS10:
1875 ret = amdgpu_atombios_get_gfx_info(adev);
1878 adev->gfx.config.max_gprs = 256;
1879 adev->gfx.config.max_gs_threads = 32;
1880 adev->gfx.config.max_hw_contexts = 8;
1882 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1883 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1884 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1885 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1886 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1889 adev->gfx.config.max_shader_engines = 4;
1890 adev->gfx.config.max_tile_pipes = 8;
1891 adev->gfx.config.max_cu_per_sh = 8;
1892 adev->gfx.config.max_sh_per_se = 1;
1893 adev->gfx.config.max_backends_per_se = 2;
1894 adev->gfx.config.max_texture_channel_caches = 8;
1895 adev->gfx.config.max_gprs = 256;
1896 adev->gfx.config.max_gs_threads = 32;
1897 adev->gfx.config.max_hw_contexts = 8;
1899 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1900 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1901 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1902 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1903 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1906 adev->gfx.config.max_shader_engines = 1;
1907 adev->gfx.config.max_tile_pipes = 2;
1908 adev->gfx.config.max_sh_per_se = 1;
1909 adev->gfx.config.max_backends_per_se = 2;
1911 switch (adev->pdev->revision) {
1919 adev->gfx.config.max_cu_per_sh = 8;
1929 adev->gfx.config.max_cu_per_sh = 6;
1936 adev->gfx.config.max_cu_per_sh = 6;
1945 adev->gfx.config.max_cu_per_sh = 4;
1949 adev->gfx.config.max_texture_channel_caches = 2;
1950 adev->gfx.config.max_gprs = 256;
1951 adev->gfx.config.max_gs_threads = 32;
1952 adev->gfx.config.max_hw_contexts = 8;
1954 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1955 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1956 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1957 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1958 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1961 adev->gfx.config.max_shader_engines = 1;
1962 adev->gfx.config.max_tile_pipes = 2;
1963 adev->gfx.config.max_sh_per_se = 1;
1964 adev->gfx.config.max_backends_per_se = 1;
1966 switch (adev->pdev->revision) {
1973 adev->gfx.config.max_cu_per_sh = 3;
1979 adev->gfx.config.max_cu_per_sh = 2;
1983 adev->gfx.config.max_texture_channel_caches = 2;
1984 adev->gfx.config.max_gprs = 256;
1985 adev->gfx.config.max_gs_threads = 16;
1986 adev->gfx.config.max_hw_contexts = 8;
1988 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1989 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1990 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1991 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1992 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1995 adev->gfx.config.max_shader_engines = 2;
1996 adev->gfx.config.max_tile_pipes = 4;
1997 adev->gfx.config.max_cu_per_sh = 2;
1998 adev->gfx.config.max_sh_per_se = 1;
1999 adev->gfx.config.max_backends_per_se = 2;
2000 adev->gfx.config.max_texture_channel_caches = 4;
2001 adev->gfx.config.max_gprs = 256;
2002 adev->gfx.config.max_gs_threads = 32;
2003 adev->gfx.config.max_hw_contexts = 8;
2005 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2006 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2007 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2008 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
2009 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
2013 mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
2014 adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
2015 mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
2017 adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
2018 adev->gfx.config.mem_max_burst_length_bytes = 256;
2019 if (adev->flags & AMD_IS_APU) {
2020 /* Get memory bank mapping mode. */
2021 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
2022 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
2023 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
2025 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
2026 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
2027 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
2029 /* Validate settings in case only one DIMM installed. */
2030 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
2031 dimm00_addr_map = 0;
2032 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
2033 dimm01_addr_map = 0;
2034 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
2035 dimm10_addr_map = 0;
2036 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
2037 dimm11_addr_map = 0;
2039 /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
2040 /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
2041 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
2042 adev->gfx.config.mem_row_size_in_kb = 2;
2044 adev->gfx.config.mem_row_size_in_kb = 1;
2046 tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
2047 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
2048 if (adev->gfx.config.mem_row_size_in_kb > 4)
2049 adev->gfx.config.mem_row_size_in_kb = 4;
2052 adev->gfx.config.shader_engine_tile_size = 32;
2053 adev->gfx.config.num_gpus = 1;
2054 adev->gfx.config.multi_gpu_tile_size = 64;
2056 /* fix up row size */
2057 switch (adev->gfx.config.mem_row_size_in_kb) {
2060 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
2063 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
2066 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
2069 adev->gfx.config.gb_addr_config = gb_addr_config;
2074 static int gfx_v8_0_sw_init(void *handle)
2077 struct amdgpu_ring *ring;
2078 struct amdgpu_kiq *kiq;
2079 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2082 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 178, &adev->gfx.kiq.irq);
2087 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 181, &adev->gfx.eop_irq);
2091 /* Privileged reg */
2092 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 184,
2093 &adev->gfx.priv_reg_irq);
2097 /* Privileged inst */
2098 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 185,
2099 &adev->gfx.priv_inst_irq);
2103 adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2105 gfx_v8_0_scratch_init(adev);
2107 r = gfx_v8_0_init_microcode(adev);
2109 DRM_ERROR("Failed to load gfx firmware!\n");
2113 r = gfx_v8_0_rlc_init(adev);
2115 DRM_ERROR("Failed to init rlc BOs!\n");
2119 r = gfx_v8_0_mec_init(adev);
2121 DRM_ERROR("Failed to init MEC BOs!\n");
2125 /* set up the gfx ring */
2126 for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2127 ring = &adev->gfx.gfx_ring[i];
2128 ring->ring_obj = NULL;
2129 sprintf(ring->name, "gfx");
2130 /* no gfx doorbells on iceland */
2131 if (adev->asic_type != CHIP_TOPAZ) {
2132 ring->use_doorbell = true;
2133 ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0;
2136 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2137 AMDGPU_CP_IRQ_GFX_EOP);
2142 /* set up the compute queues */
2143 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
2146 /* max 32 queues per MEC */
2147 if ((i >= 32) || (i >= AMDGPU_MAX_COMPUTE_RINGS)) {
2148 DRM_ERROR("Too many (%d) compute rings!\n", i);
2151 ring = &adev->gfx.compute_ring[i];
2152 ring->ring_obj = NULL;
2153 ring->use_doorbell = true;
2154 ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + i;
2155 ring->me = 1; /* first MEC */
2157 ring->queue = i % 8;
2158 ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE);
2159 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
2160 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe;
2161 /* type-2 packets are deprecated on MEC, use type-3 instead */
2162 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2168 if (amdgpu_sriov_vf(adev)) {
2169 r = gfx_v8_0_kiq_init(adev);
2171 DRM_ERROR("Failed to init KIQ BOs!\n");
2175 kiq = &adev->gfx.kiq;
2176 r = gfx_v8_0_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
2180 /* create MQD for all compute queues as wel as KIQ for SRIOV case */
2181 r = gfx_v8_0_compute_mqd_sw_init(adev);
2186 /* reserve GDS, GWS and OA resource for gfx */
2187 r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size,
2188 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS,
2189 &adev->gds.gds_gfx_bo, NULL, NULL);
2193 r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size,
2194 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS,
2195 &adev->gds.gws_gfx_bo, NULL, NULL);
2199 r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size,
2200 PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA,
2201 &adev->gds.oa_gfx_bo, NULL, NULL);
2205 adev->gfx.ce_ram_size = 0x8000;
2207 r = gfx_v8_0_gpu_early_init(adev);
2214 static int gfx_v8_0_sw_fini(void *handle)
2217 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2219 amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
2220 amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL);
2221 amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
2223 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2224 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2225 for (i = 0; i < adev->gfx.num_compute_rings; i++)
2226 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2228 if (amdgpu_sriov_vf(adev)) {
2229 gfx_v8_0_compute_mqd_sw_fini(adev);
2230 gfx_v8_0_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
2231 gfx_v8_0_kiq_fini(adev);
2234 gfx_v8_0_mec_fini(adev);
2235 gfx_v8_0_rlc_fini(adev);
2236 gfx_v8_0_free_microcode(adev);
2241 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
2243 uint32_t *modearray, *mod2array;
2244 const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
2245 const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
2248 modearray = adev->gfx.config.tile_mode_array;
2249 mod2array = adev->gfx.config.macrotile_mode_array;
2251 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2252 modearray[reg_offset] = 0;
2254 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2255 mod2array[reg_offset] = 0;
2257 switch (adev->asic_type) {
2259 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2260 PIPE_CONFIG(ADDR_SURF_P2) |
2261 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2262 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2263 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2264 PIPE_CONFIG(ADDR_SURF_P2) |
2265 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2266 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2267 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2268 PIPE_CONFIG(ADDR_SURF_P2) |
2269 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2270 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2271 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2272 PIPE_CONFIG(ADDR_SURF_P2) |
2273 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2274 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2275 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2276 PIPE_CONFIG(ADDR_SURF_P2) |
2277 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2278 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2279 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2280 PIPE_CONFIG(ADDR_SURF_P2) |
2281 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2282 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2283 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2284 PIPE_CONFIG(ADDR_SURF_P2) |
2285 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2286 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2287 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2288 PIPE_CONFIG(ADDR_SURF_P2));
2289 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2290 PIPE_CONFIG(ADDR_SURF_P2) |
2291 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2292 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2293 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2294 PIPE_CONFIG(ADDR_SURF_P2) |
2295 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2296 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2297 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2298 PIPE_CONFIG(ADDR_SURF_P2) |
2299 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2300 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2301 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2302 PIPE_CONFIG(ADDR_SURF_P2) |
2303 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2304 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2305 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2306 PIPE_CONFIG(ADDR_SURF_P2) |
2307 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2308 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2309 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2310 PIPE_CONFIG(ADDR_SURF_P2) |
2311 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2312 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2313 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2314 PIPE_CONFIG(ADDR_SURF_P2) |
2315 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2316 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2317 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2318 PIPE_CONFIG(ADDR_SURF_P2) |
2319 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2320 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2321 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2322 PIPE_CONFIG(ADDR_SURF_P2) |
2323 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2324 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2325 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2326 PIPE_CONFIG(ADDR_SURF_P2) |
2327 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2328 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2329 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2330 PIPE_CONFIG(ADDR_SURF_P2) |
2331 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2332 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2333 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2334 PIPE_CONFIG(ADDR_SURF_P2) |
2335 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2336 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2337 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2338 PIPE_CONFIG(ADDR_SURF_P2) |
2339 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2340 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2341 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2342 PIPE_CONFIG(ADDR_SURF_P2) |
2343 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2344 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2345 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2346 PIPE_CONFIG(ADDR_SURF_P2) |
2347 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2348 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2349 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2350 PIPE_CONFIG(ADDR_SURF_P2) |
2351 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2352 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2353 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2354 PIPE_CONFIG(ADDR_SURF_P2) |
2355 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2356 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2357 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2358 PIPE_CONFIG(ADDR_SURF_P2) |
2359 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2360 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2362 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2363 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2364 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2365 NUM_BANKS(ADDR_SURF_8_BANK));
2366 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2367 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2368 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2369 NUM_BANKS(ADDR_SURF_8_BANK));
2370 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2371 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2372 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2373 NUM_BANKS(ADDR_SURF_8_BANK));
2374 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2375 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2376 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2377 NUM_BANKS(ADDR_SURF_8_BANK));
2378 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2379 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2380 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2381 NUM_BANKS(ADDR_SURF_8_BANK));
2382 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2383 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2384 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2385 NUM_BANKS(ADDR_SURF_8_BANK));
2386 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2387 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2388 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2389 NUM_BANKS(ADDR_SURF_8_BANK));
2390 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2391 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2392 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2393 NUM_BANKS(ADDR_SURF_16_BANK));
2394 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2395 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2396 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2397 NUM_BANKS(ADDR_SURF_16_BANK));
2398 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2399 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2400 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2401 NUM_BANKS(ADDR_SURF_16_BANK));
2402 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2403 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2404 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2405 NUM_BANKS(ADDR_SURF_16_BANK));
2406 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2407 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2408 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2409 NUM_BANKS(ADDR_SURF_16_BANK));
2410 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2411 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2412 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2413 NUM_BANKS(ADDR_SURF_16_BANK));
2414 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2415 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2416 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2417 NUM_BANKS(ADDR_SURF_8_BANK));
2419 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2420 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2422 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2424 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2425 if (reg_offset != 7)
2426 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2430 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2431 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2432 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2433 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2434 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2435 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2436 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2437 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2438 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2439 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2440 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2441 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2442 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2443 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2444 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2445 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2446 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2447 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2448 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2449 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2450 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2451 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2452 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2453 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2454 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2455 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2456 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2457 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2458 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2459 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2460 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2461 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2462 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2463 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2464 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2465 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2466 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2467 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2468 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2469 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2470 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2471 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2472 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2473 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2474 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2475 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2476 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2477 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2478 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2479 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2480 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2481 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2482 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2483 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2484 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2485 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2486 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2487 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2488 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2489 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2490 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2491 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2492 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2493 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2494 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2495 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2496 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2497 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2498 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2499 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2500 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2501 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2502 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2503 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2504 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2505 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2506 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2507 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2508 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2509 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2510 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2511 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2512 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2513 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2514 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2515 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2516 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2517 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2518 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2519 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2520 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2521 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2522 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2523 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2524 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2525 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2526 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2527 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2528 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2529 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2530 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2531 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2532 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2533 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2534 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2535 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2536 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2537 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2538 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2539 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2540 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2541 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2542 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2543 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2544 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2545 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2546 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2547 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2548 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2549 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2550 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2551 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2553 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2554 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2555 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2556 NUM_BANKS(ADDR_SURF_8_BANK));
2557 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2558 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2559 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2560 NUM_BANKS(ADDR_SURF_8_BANK));
2561 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2562 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2563 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2564 NUM_BANKS(ADDR_SURF_8_BANK));
2565 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2566 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2567 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2568 NUM_BANKS(ADDR_SURF_8_BANK));
2569 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2570 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2571 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2572 NUM_BANKS(ADDR_SURF_8_BANK));
2573 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2574 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2575 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2576 NUM_BANKS(ADDR_SURF_8_BANK));
2577 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2578 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2579 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2580 NUM_BANKS(ADDR_SURF_8_BANK));
2581 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2582 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2583 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2584 NUM_BANKS(ADDR_SURF_8_BANK));
2585 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2586 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2587 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2588 NUM_BANKS(ADDR_SURF_8_BANK));
2589 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2590 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2591 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2592 NUM_BANKS(ADDR_SURF_8_BANK));
2593 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2594 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2595 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2596 NUM_BANKS(ADDR_SURF_8_BANK));
2597 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2598 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2599 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2600 NUM_BANKS(ADDR_SURF_8_BANK));
2601 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2602 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2603 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2604 NUM_BANKS(ADDR_SURF_8_BANK));
2605 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2606 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2607 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2608 NUM_BANKS(ADDR_SURF_4_BANK));
2610 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2611 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2613 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2614 if (reg_offset != 7)
2615 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2619 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2620 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2621 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2622 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2623 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2624 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2625 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2626 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2627 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2628 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2629 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2630 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2631 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2632 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2633 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2634 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2635 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2636 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2637 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2638 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2639 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2640 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2641 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2642 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2643 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2644 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2645 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2646 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2647 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2648 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2649 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2650 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2651 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2652 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2653 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2654 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2655 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2656 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2657 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2658 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2659 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2660 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2661 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2662 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2663 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2664 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2665 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2666 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2667 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2668 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2669 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2670 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2671 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2672 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2673 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2674 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2675 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2676 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2677 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2678 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2679 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2680 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2681 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2682 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2683 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2684 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2685 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2686 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2687 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2688 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2689 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2690 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2691 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2692 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2693 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2694 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2695 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2696 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2697 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2698 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2699 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2700 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2701 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2702 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2703 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2704 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2705 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2706 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2707 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2708 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2709 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2710 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2711 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2712 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2713 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2714 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2715 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2716 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2717 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2718 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2719 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2720 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2721 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2722 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2723 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2724 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2725 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2726 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2727 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2728 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2729 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2730 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2731 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2732 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2733 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2734 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2735 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2736 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2737 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2738 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2739 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2740 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2742 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2743 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2744 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2745 NUM_BANKS(ADDR_SURF_16_BANK));
2746 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2747 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2748 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2749 NUM_BANKS(ADDR_SURF_16_BANK));
2750 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2751 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2752 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2753 NUM_BANKS(ADDR_SURF_16_BANK));
2754 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2755 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2756 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2757 NUM_BANKS(ADDR_SURF_16_BANK));
2758 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2759 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2760 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2761 NUM_BANKS(ADDR_SURF_16_BANK));
2762 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2763 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2764 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2765 NUM_BANKS(ADDR_SURF_16_BANK));
2766 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2767 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2768 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2769 NUM_BANKS(ADDR_SURF_16_BANK));
2770 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2771 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2772 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2773 NUM_BANKS(ADDR_SURF_16_BANK));
2774 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2775 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2776 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2777 NUM_BANKS(ADDR_SURF_16_BANK));
2778 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2779 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2780 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2781 NUM_BANKS(ADDR_SURF_16_BANK));
2782 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2783 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2784 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2785 NUM_BANKS(ADDR_SURF_16_BANK));
2786 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2787 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2788 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2789 NUM_BANKS(ADDR_SURF_8_BANK));
2790 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2791 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2792 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2793 NUM_BANKS(ADDR_SURF_4_BANK));
2794 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2795 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2796 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2797 NUM_BANKS(ADDR_SURF_4_BANK));
2799 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2800 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2802 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2803 if (reg_offset != 7)
2804 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2807 case CHIP_POLARIS11:
2808 case CHIP_POLARIS12:
2809 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2810 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2811 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2812 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2813 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2814 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2815 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2816 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2817 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2818 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2819 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2820 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2821 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2822 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2823 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2824 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2825 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2826 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2827 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2828 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2829 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2830 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2831 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2832 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2833 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2834 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2835 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2836 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2837 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2838 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2839 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2840 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2841 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2842 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2843 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2844 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2845 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2846 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2847 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2848 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2849 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2850 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2851 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2852 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2853 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2854 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2855 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2856 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2857 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2858 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2859 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2860 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2861 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2862 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2863 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2864 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2865 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2866 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2867 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2868 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2869 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2870 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2871 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2872 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2873 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2874 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2875 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2876 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2877 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2878 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2879 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2880 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2881 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2882 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2883 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2884 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2885 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2886 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2887 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2888 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2889 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2890 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2891 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2892 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2893 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2894 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2895 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2896 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2897 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2898 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2899 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2900 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2901 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2902 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2903 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2904 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2905 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2906 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2907 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2908 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2909 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2910 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2911 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2912 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2913 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2914 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2915 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2916 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2917 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2918 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2919 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2920 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2921 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2922 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2923 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2924 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2925 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2926 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2927 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2928 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2929 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2930 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2932 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2933 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2934 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2935 NUM_BANKS(ADDR_SURF_16_BANK));
2937 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2938 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2939 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2940 NUM_BANKS(ADDR_SURF_16_BANK));
2942 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2943 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2944 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2945 NUM_BANKS(ADDR_SURF_16_BANK));
2947 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2948 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2949 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2950 NUM_BANKS(ADDR_SURF_16_BANK));
2952 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2953 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2954 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2955 NUM_BANKS(ADDR_SURF_16_BANK));
2957 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2958 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2959 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2960 NUM_BANKS(ADDR_SURF_16_BANK));
2962 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2963 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2964 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2965 NUM_BANKS(ADDR_SURF_16_BANK));
2967 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2968 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2969 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2970 NUM_BANKS(ADDR_SURF_16_BANK));
2972 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2973 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2974 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2975 NUM_BANKS(ADDR_SURF_16_BANK));
2977 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2978 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2979 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2980 NUM_BANKS(ADDR_SURF_16_BANK));
2982 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2983 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2984 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2985 NUM_BANKS(ADDR_SURF_16_BANK));
2987 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2988 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2989 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2990 NUM_BANKS(ADDR_SURF_16_BANK));
2992 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2993 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2994 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2995 NUM_BANKS(ADDR_SURF_8_BANK));
2997 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2998 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2999 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3000 NUM_BANKS(ADDR_SURF_4_BANK));
3002 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3003 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3005 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3006 if (reg_offset != 7)
3007 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3010 case CHIP_POLARIS10:
3011 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3012 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3013 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3014 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3015 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3016 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3017 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3018 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3019 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3020 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3021 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3022 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3023 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3024 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3025 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3026 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3027 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3028 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3029 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3030 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3031 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3032 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3033 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3034 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3035 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3036 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3037 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3038 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3039 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3040 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3041 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3042 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3043 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3044 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
3045 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3046 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3047 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3048 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3049 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3050 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3051 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3052 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3053 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3054 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3055 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3056 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3057 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3058 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3059 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3060 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3061 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3062 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3063 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3064 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3065 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3066 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3067 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3068 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3069 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3070 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3071 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3072 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3073 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3074 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3075 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3076 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3077 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3078 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3079 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3080 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3081 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3082 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3083 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3084 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3085 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3086 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3087 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3088 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3089 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3090 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3091 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3092 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3093 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3094 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3095 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3096 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3097 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3098 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3099 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3100 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3101 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3102 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3103 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3104 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3105 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3106 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3107 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3108 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3109 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3110 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3111 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3112 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3113 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3114 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3115 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3116 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3117 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3118 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3119 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3120 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3121 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3122 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3123 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3124 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3125 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3126 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3127 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3128 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3129 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3130 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3131 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3132 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3134 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3135 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3136 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3137 NUM_BANKS(ADDR_SURF_16_BANK));
3139 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3140 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3141 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3142 NUM_BANKS(ADDR_SURF_16_BANK));
3144 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3145 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3146 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3147 NUM_BANKS(ADDR_SURF_16_BANK));
3149 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3150 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3151 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3152 NUM_BANKS(ADDR_SURF_16_BANK));
3154 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3155 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3156 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3157 NUM_BANKS(ADDR_SURF_16_BANK));
3159 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3160 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3161 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3162 NUM_BANKS(ADDR_SURF_16_BANK));
3164 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3165 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3166 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3167 NUM_BANKS(ADDR_SURF_16_BANK));
3169 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3170 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3171 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3172 NUM_BANKS(ADDR_SURF_16_BANK));
3174 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3175 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3176 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3177 NUM_BANKS(ADDR_SURF_16_BANK));
3179 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3180 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3181 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3182 NUM_BANKS(ADDR_SURF_16_BANK));
3184 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3185 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3186 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3187 NUM_BANKS(ADDR_SURF_16_BANK));
3189 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3190 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3191 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3192 NUM_BANKS(ADDR_SURF_8_BANK));
3194 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3195 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3196 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3197 NUM_BANKS(ADDR_SURF_4_BANK));
3199 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3200 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3201 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3202 NUM_BANKS(ADDR_SURF_4_BANK));
3204 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3205 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3207 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3208 if (reg_offset != 7)
3209 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3213 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3214 PIPE_CONFIG(ADDR_SURF_P2) |
3215 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3216 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3217 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3218 PIPE_CONFIG(ADDR_SURF_P2) |
3219 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3220 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3221 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3222 PIPE_CONFIG(ADDR_SURF_P2) |
3223 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3224 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3225 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3226 PIPE_CONFIG(ADDR_SURF_P2) |
3227 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3228 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3229 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3230 PIPE_CONFIG(ADDR_SURF_P2) |
3231 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3232 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3233 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3234 PIPE_CONFIG(ADDR_SURF_P2) |
3235 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3236 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3237 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3238 PIPE_CONFIG(ADDR_SURF_P2) |
3239 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3240 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3241 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3242 PIPE_CONFIG(ADDR_SURF_P2));
3243 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3244 PIPE_CONFIG(ADDR_SURF_P2) |
3245 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3246 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3247 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3248 PIPE_CONFIG(ADDR_SURF_P2) |
3249 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3250 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3251 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3252 PIPE_CONFIG(ADDR_SURF_P2) |
3253 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3254 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3255 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3256 PIPE_CONFIG(ADDR_SURF_P2) |
3257 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3258 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3259 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3260 PIPE_CONFIG(ADDR_SURF_P2) |
3261 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3262 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3263 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3264 PIPE_CONFIG(ADDR_SURF_P2) |
3265 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3266 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3267 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3268 PIPE_CONFIG(ADDR_SURF_P2) |
3269 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3270 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3271 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3272 PIPE_CONFIG(ADDR_SURF_P2) |
3273 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3274 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3275 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3276 PIPE_CONFIG(ADDR_SURF_P2) |
3277 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3278 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3279 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3280 PIPE_CONFIG(ADDR_SURF_P2) |
3281 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3282 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3283 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3284 PIPE_CONFIG(ADDR_SURF_P2) |
3285 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3286 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3287 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3288 PIPE_CONFIG(ADDR_SURF_P2) |
3289 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3290 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3291 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3292 PIPE_CONFIG(ADDR_SURF_P2) |
3293 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3294 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3295 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3296 PIPE_CONFIG(ADDR_SURF_P2) |
3297 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3298 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3299 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3300 PIPE_CONFIG(ADDR_SURF_P2) |
3301 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3302 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3303 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3304 PIPE_CONFIG(ADDR_SURF_P2) |
3305 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3306 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3307 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3308 PIPE_CONFIG(ADDR_SURF_P2) |
3309 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3310 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3311 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3312 PIPE_CONFIG(ADDR_SURF_P2) |
3313 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3314 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3316 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3317 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3318 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3319 NUM_BANKS(ADDR_SURF_8_BANK));
3320 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3321 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3322 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3323 NUM_BANKS(ADDR_SURF_8_BANK));
3324 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3325 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3326 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3327 NUM_BANKS(ADDR_SURF_8_BANK));
3328 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3329 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3330 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3331 NUM_BANKS(ADDR_SURF_8_BANK));
3332 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3333 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3334 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3335 NUM_BANKS(ADDR_SURF_8_BANK));
3336 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3337 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3338 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3339 NUM_BANKS(ADDR_SURF_8_BANK));
3340 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3341 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3342 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3343 NUM_BANKS(ADDR_SURF_8_BANK));
3344 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3345 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3346 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3347 NUM_BANKS(ADDR_SURF_16_BANK));
3348 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3349 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3350 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3351 NUM_BANKS(ADDR_SURF_16_BANK));
3352 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3353 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3354 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3355 NUM_BANKS(ADDR_SURF_16_BANK));
3356 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3357 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3358 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3359 NUM_BANKS(ADDR_SURF_16_BANK));
3360 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3361 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3362 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3363 NUM_BANKS(ADDR_SURF_16_BANK));
3364 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3365 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3366 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3367 NUM_BANKS(ADDR_SURF_16_BANK));
3368 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3369 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3370 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3371 NUM_BANKS(ADDR_SURF_8_BANK));
3373 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3374 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3376 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3378 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3379 if (reg_offset != 7)
3380 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3385 "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
3389 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3390 PIPE_CONFIG(ADDR_SURF_P2) |
3391 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3392 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3393 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3394 PIPE_CONFIG(ADDR_SURF_P2) |
3395 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3396 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3397 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3398 PIPE_CONFIG(ADDR_SURF_P2) |
3399 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3400 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3401 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3402 PIPE_CONFIG(ADDR_SURF_P2) |
3403 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3404 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3405 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3406 PIPE_CONFIG(ADDR_SURF_P2) |
3407 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3408 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3409 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3410 PIPE_CONFIG(ADDR_SURF_P2) |
3411 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3412 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3413 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3414 PIPE_CONFIG(ADDR_SURF_P2) |
3415 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3416 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3417 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3418 PIPE_CONFIG(ADDR_SURF_P2));
3419 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3420 PIPE_CONFIG(ADDR_SURF_P2) |
3421 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3422 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3423 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3424 PIPE_CONFIG(ADDR_SURF_P2) |
3425 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3426 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3427 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3428 PIPE_CONFIG(ADDR_SURF_P2) |
3429 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3430 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3431 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3432 PIPE_CONFIG(ADDR_SURF_P2) |
3433 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3434 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3435 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3436 PIPE_CONFIG(ADDR_SURF_P2) |
3437 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3438 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3439 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3440 PIPE_CONFIG(ADDR_SURF_P2) |
3441 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3442 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3443 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3444 PIPE_CONFIG(ADDR_SURF_P2) |
3445 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3446 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3447 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3448 PIPE_CONFIG(ADDR_SURF_P2) |
3449 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3450 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3451 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3452 PIPE_CONFIG(ADDR_SURF_P2) |
3453 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3454 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3455 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3456 PIPE_CONFIG(ADDR_SURF_P2) |
3457 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3458 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3459 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3460 PIPE_CONFIG(ADDR_SURF_P2) |
3461 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3462 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3463 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3464 PIPE_CONFIG(ADDR_SURF_P2) |
3465 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3466 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3467 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3468 PIPE_CONFIG(ADDR_SURF_P2) |
3469 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3470 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3471 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3472 PIPE_CONFIG(ADDR_SURF_P2) |
3473 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3474 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3475 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3476 PIPE_CONFIG(ADDR_SURF_P2) |
3477 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3478 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3479 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3480 PIPE_CONFIG(ADDR_SURF_P2) |
3481 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3482 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3483 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3484 PIPE_CONFIG(ADDR_SURF_P2) |
3485 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3486 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3487 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3488 PIPE_CONFIG(ADDR_SURF_P2) |
3489 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3490 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3492 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3493 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3494 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3495 NUM_BANKS(ADDR_SURF_8_BANK));
3496 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3497 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3498 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3499 NUM_BANKS(ADDR_SURF_8_BANK));
3500 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3501 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3502 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3503 NUM_BANKS(ADDR_SURF_8_BANK));
3504 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3505 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3506 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3507 NUM_BANKS(ADDR_SURF_8_BANK));
3508 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3509 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3510 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3511 NUM_BANKS(ADDR_SURF_8_BANK));
3512 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3513 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3514 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3515 NUM_BANKS(ADDR_SURF_8_BANK));
3516 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3517 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3518 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3519 NUM_BANKS(ADDR_SURF_8_BANK));
3520 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3521 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3522 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3523 NUM_BANKS(ADDR_SURF_16_BANK));
3524 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3525 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3526 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3527 NUM_BANKS(ADDR_SURF_16_BANK));
3528 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3529 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3530 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3531 NUM_BANKS(ADDR_SURF_16_BANK));
3532 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3533 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3534 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3535 NUM_BANKS(ADDR_SURF_16_BANK));
3536 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3537 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3538 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3539 NUM_BANKS(ADDR_SURF_16_BANK));
3540 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3541 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3542 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3543 NUM_BANKS(ADDR_SURF_16_BANK));
3544 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3545 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3546 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3547 NUM_BANKS(ADDR_SURF_8_BANK));
3549 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3550 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3552 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3554 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3555 if (reg_offset != 7)
3556 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3562 static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
3563 u32 se_num, u32 sh_num, u32 instance)
3567 if (instance == 0xffffffff)
3568 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
3570 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
3572 if (se_num == 0xffffffff)
3573 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3575 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3577 if (sh_num == 0xffffffff)
3578 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3580 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3582 WREG32(mmGRBM_GFX_INDEX, data);
3585 static u32 gfx_v8_0_create_bitmask(u32 bit_width)
3587 return (u32)((1ULL << bit_width) - 1);
3590 static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
3594 data = RREG32(mmCC_RB_BACKEND_DISABLE) |
3595 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3597 data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE);
3599 mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_backends_per_se /
3600 adev->gfx.config.max_sh_per_se);
3602 return (~data) & mask;
3606 gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1)
3608 switch (adev->asic_type) {
3610 *rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) |
3611 RB_XSEL2(1) | PKR_MAP(2) |
3612 PKR_XSEL(1) | PKR_YSEL(1) |
3613 SE_MAP(2) | SE_XSEL(2) | SE_YSEL(3);
3614 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) |
3618 case CHIP_POLARIS10:
3619 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3620 SE_XSEL(1) | SE_YSEL(1);
3621 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(2) |
3626 *rconf |= RB_MAP_PKR0(2);
3629 case CHIP_POLARIS11:
3630 case CHIP_POLARIS12:
3631 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3632 SE_XSEL(1) | SE_YSEL(1);
3640 DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type);
3646 gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev,
3647 u32 raster_config, u32 raster_config_1,
3648 unsigned rb_mask, unsigned num_rb)
3650 unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1);
3651 unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1);
3652 unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2);
3653 unsigned rb_per_se = num_rb / num_se;
3654 unsigned se_mask[4];
3657 se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask;
3658 se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask;
3659 se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask;
3660 se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask;
3662 WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4));
3663 WARN_ON(!(sh_per_se == 1 || sh_per_se == 2));
3664 WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2));
3666 if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
3667 (!se_mask[2] && !se_mask[3]))) {
3668 raster_config_1 &= ~SE_PAIR_MAP_MASK;
3670 if (!se_mask[0] && !se_mask[1]) {
3672 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3);
3675 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0);
3679 for (se = 0; se < num_se; se++) {
3680 unsigned raster_config_se = raster_config;
3681 unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
3682 unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
3683 int idx = (se / 2) * 2;
3685 if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
3686 raster_config_se &= ~SE_MAP_MASK;
3688 if (!se_mask[idx]) {
3689 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3);
3691 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0);
3695 pkr0_mask &= rb_mask;
3696 pkr1_mask &= rb_mask;
3697 if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
3698 raster_config_se &= ~PKR_MAP_MASK;
3701 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3);
3703 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0);
3707 if (rb_per_se >= 2) {
3708 unsigned rb0_mask = 1 << (se * rb_per_se);
3709 unsigned rb1_mask = rb0_mask << 1;
3711 rb0_mask &= rb_mask;
3712 rb1_mask &= rb_mask;
3713 if (!rb0_mask || !rb1_mask) {
3714 raster_config_se &= ~RB_MAP_PKR0_MASK;
3718 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3);
3721 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0);
3725 if (rb_per_se > 2) {
3726 rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
3727 rb1_mask = rb0_mask << 1;
3728 rb0_mask &= rb_mask;
3729 rb1_mask &= rb_mask;
3730 if (!rb0_mask || !rb1_mask) {
3731 raster_config_se &= ~RB_MAP_PKR1_MASK;
3735 RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3);
3738 RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0);
3744 /* GRBM_GFX_INDEX has a different offset on VI */
3745 gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff);
3746 WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se);
3747 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3750 /* GRBM_GFX_INDEX has a different offset on VI */
3751 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3754 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
3758 u32 raster_config = 0, raster_config_1 = 0;
3760 u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
3761 adev->gfx.config.max_sh_per_se;
3762 unsigned num_rb_pipes;
3764 mutex_lock(&adev->grbm_idx_mutex);
3765 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3766 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3767 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3768 data = gfx_v8_0_get_rb_active_bitmap(adev);
3769 active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
3770 rb_bitmap_width_per_sh);
3773 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3775 adev->gfx.config.backend_enable_mask = active_rbs;
3776 adev->gfx.config.num_rbs = hweight32(active_rbs);
3778 num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se *
3779 adev->gfx.config.max_shader_engines, 16);
3781 gfx_v8_0_raster_config(adev, &raster_config, &raster_config_1);
3783 if (!adev->gfx.config.backend_enable_mask ||
3784 adev->gfx.config.num_rbs >= num_rb_pipes) {
3785 WREG32(mmPA_SC_RASTER_CONFIG, raster_config);
3786 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3788 gfx_v8_0_write_harvested_raster_configs(adev, raster_config, raster_config_1,
3789 adev->gfx.config.backend_enable_mask,
3793 /* cache the values for userspace */
3794 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3795 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3796 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3797 adev->gfx.config.rb_config[i][j].rb_backend_disable =
3798 RREG32(mmCC_RB_BACKEND_DISABLE);
3799 adev->gfx.config.rb_config[i][j].user_rb_backend_disable =
3800 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3801 adev->gfx.config.rb_config[i][j].raster_config =
3802 RREG32(mmPA_SC_RASTER_CONFIG);
3803 adev->gfx.config.rb_config[i][j].raster_config_1 =
3804 RREG32(mmPA_SC_RASTER_CONFIG_1);
3807 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3808 mutex_unlock(&adev->grbm_idx_mutex);
3812 * gfx_v8_0_init_compute_vmid - gart enable
3814 * @rdev: amdgpu_device pointer
3816 * Initialize compute vmid sh_mem registers
3819 #define DEFAULT_SH_MEM_BASES (0x6000)
3820 #define FIRST_COMPUTE_VMID (8)
3821 #define LAST_COMPUTE_VMID (16)
3822 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
3825 uint32_t sh_mem_config;
3826 uint32_t sh_mem_bases;
3829 * Configure apertures:
3830 * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB)
3831 * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB)
3832 * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB)
3834 sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
3836 sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
3837 SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
3838 SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
3839 SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
3840 MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
3841 SH_MEM_CONFIG__PRIVATE_ATC_MASK;
3843 mutex_lock(&adev->srbm_mutex);
3844 for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
3845 vi_srbm_select(adev, 0, 0, 0, i);
3846 /* CP and shaders */
3847 WREG32(mmSH_MEM_CONFIG, sh_mem_config);
3848 WREG32(mmSH_MEM_APE1_BASE, 1);
3849 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3850 WREG32(mmSH_MEM_BASES, sh_mem_bases);
3852 vi_srbm_select(adev, 0, 0, 0, 0);
3853 mutex_unlock(&adev->srbm_mutex);
3856 static void gfx_v8_0_config_init(struct amdgpu_device *adev)
3858 switch (adev->asic_type) {
3860 adev->gfx.config.double_offchip_lds_buf = 1;
3864 adev->gfx.config.double_offchip_lds_buf = 0;
3869 static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
3871 u32 tmp, sh_static_mem_cfg;
3874 WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF);
3875 WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3876 WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3877 WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
3879 gfx_v8_0_tiling_mode_table_init(adev);
3880 gfx_v8_0_setup_rb(adev);
3881 gfx_v8_0_get_cu_info(adev);
3882 gfx_v8_0_config_init(adev);
3884 /* XXX SH_MEM regs */
3885 /* where to put LDS, scratch, GPUVM in FSA64 space */
3886 sh_static_mem_cfg = REG_SET_FIELD(0, SH_STATIC_MEM_CONFIG,
3888 sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3890 sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3892 mutex_lock(&adev->srbm_mutex);
3893 for (i = 0; i < adev->vm_manager.num_ids; i++) {
3894 vi_srbm_select(adev, 0, 0, 0, i);
3895 /* CP and shaders */
3897 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
3898 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3899 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3900 SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3901 WREG32(mmSH_MEM_CONFIG, tmp);
3902 WREG32(mmSH_MEM_BASES, 0);
3904 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
3905 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3906 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3907 SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3908 WREG32(mmSH_MEM_CONFIG, tmp);
3909 tmp = adev->mc.shared_aperture_start >> 48;
3910 WREG32(mmSH_MEM_BASES, tmp);
3913 WREG32(mmSH_MEM_APE1_BASE, 1);
3914 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3915 WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg);
3917 vi_srbm_select(adev, 0, 0, 0, 0);
3918 mutex_unlock(&adev->srbm_mutex);
3920 gfx_v8_0_init_compute_vmid(adev);
3922 mutex_lock(&adev->grbm_idx_mutex);
3924 * making sure that the following register writes will be broadcasted
3925 * to all the shaders
3927 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3929 WREG32(mmPA_SC_FIFO_SIZE,
3930 (adev->gfx.config.sc_prim_fifo_size_frontend <<
3931 PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
3932 (adev->gfx.config.sc_prim_fifo_size_backend <<
3933 PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
3934 (adev->gfx.config.sc_hiz_tile_fifo_size <<
3935 PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
3936 (adev->gfx.config.sc_earlyz_tile_fifo_size <<
3937 PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
3939 tmp = RREG32(mmSPI_ARB_PRIORITY);
3940 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS0, 2);
3941 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS1, 2);
3942 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS2, 2);
3943 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS3, 2);
3944 WREG32(mmSPI_ARB_PRIORITY, tmp);
3946 mutex_unlock(&adev->grbm_idx_mutex);
3950 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3955 mutex_lock(&adev->grbm_idx_mutex);
3956 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3957 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3958 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3959 for (k = 0; k < adev->usec_timeout; k++) {
3960 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3966 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3967 mutex_unlock(&adev->grbm_idx_mutex);
3969 mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3970 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3971 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3972 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3973 for (k = 0; k < adev->usec_timeout; k++) {
3974 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3980 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3983 u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3985 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
3986 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
3987 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
3988 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
3990 WREG32(mmCP_INT_CNTL_RING0, tmp);
3993 static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
3996 WREG32(mmRLC_CSIB_ADDR_HI,
3997 adev->gfx.rlc.clear_state_gpu_addr >> 32);
3998 WREG32(mmRLC_CSIB_ADDR_LO,
3999 adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
4000 WREG32(mmRLC_CSIB_LENGTH,
4001 adev->gfx.rlc.clear_state_size);
4004 static void gfx_v8_0_parse_ind_reg_list(int *register_list_format,
4007 int *unique_indices,
4010 int *ind_start_offsets,
4015 bool new_entry = true;
4017 for (; ind_offset < list_size; ind_offset++) {
4021 ind_start_offsets[*offset_count] = ind_offset;
4022 *offset_count = *offset_count + 1;
4023 BUG_ON(*offset_count >= max_offset);
4026 if (register_list_format[ind_offset] == 0xFFFFFFFF) {
4033 /* look for the matching indice */
4035 indices < *indices_count;
4037 if (unique_indices[indices] ==
4038 register_list_format[ind_offset])
4042 if (indices >= *indices_count) {
4043 unique_indices[*indices_count] =
4044 register_list_format[ind_offset];
4045 indices = *indices_count;
4046 *indices_count = *indices_count + 1;
4047 BUG_ON(*indices_count >= max_indices);
4050 register_list_format[ind_offset] = indices;
4054 static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
4057 int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0};
4058 int indices_count = 0;
4059 int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
4060 int offset_count = 0;
4063 unsigned int *register_list_format =
4064 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
4065 if (!register_list_format)
4067 memcpy(register_list_format, adev->gfx.rlc.register_list_format,
4068 adev->gfx.rlc.reg_list_format_size_bytes);
4070 gfx_v8_0_parse_ind_reg_list(register_list_format,
4071 RLC_FormatDirectRegListLength,
4072 adev->gfx.rlc.reg_list_format_size_bytes >> 2,
4075 sizeof(unique_indices) / sizeof(int),
4076 indirect_start_offsets,
4078 sizeof(indirect_start_offsets)/sizeof(int));
4080 /* save and restore list */
4081 WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1);
4083 WREG32(mmRLC_SRM_ARAM_ADDR, 0);
4084 for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
4085 WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]);
4088 WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start);
4089 for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
4090 WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]);
4092 list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
4093 list_size = list_size >> 1;
4094 WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size);
4095 WREG32(mmRLC_GPM_SCRATCH_DATA, list_size);
4097 /* starting offsets starts */
4098 WREG32(mmRLC_GPM_SCRATCH_ADDR,
4099 adev->gfx.rlc.starting_offsets_start);
4100 for (i = 0; i < sizeof(indirect_start_offsets)/sizeof(int); i++)
4101 WREG32(mmRLC_GPM_SCRATCH_DATA,
4102 indirect_start_offsets[i]);
4104 /* unique indices */
4105 temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
4106 data = mmRLC_SRM_INDEX_CNTL_DATA_0;
4107 for (i = 0; i < sizeof(unique_indices) / sizeof(int); i++) {
4108 if (unique_indices[i] != 0) {
4109 WREG32(temp + i, unique_indices[i] & 0x3FFFF);
4110 WREG32(data + i, unique_indices[i] >> 20);
4113 kfree(register_list_format);
4118 static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
4120 WREG32_FIELD(RLC_SRM_CNTL, SRM_ENABLE, 1);
4123 static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev)
4127 WREG32_FIELD(CP_RB_WPTR_POLL_CNTL, IDLE_POLL_COUNT, 0x60);
4129 data = REG_SET_FIELD(0, RLC_PG_DELAY, POWER_UP_DELAY, 0x10);
4130 data = REG_SET_FIELD(data, RLC_PG_DELAY, POWER_DOWN_DELAY, 0x10);
4131 data = REG_SET_FIELD(data, RLC_PG_DELAY, CMD_PROPAGATE_DELAY, 0x10);
4132 data = REG_SET_FIELD(data, RLC_PG_DELAY, MEM_SLEEP_DELAY, 0x10);
4133 WREG32(mmRLC_PG_DELAY, data);
4135 WREG32_FIELD(RLC_PG_DELAY_2, SERDES_CMD_DELAY, 0x3);
4136 WREG32_FIELD(RLC_AUTO_PG_CTRL, GRBM_REG_SAVE_GFX_IDLE_THRESHOLD, 0x55f0);
4140 static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
4143 WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PU_ENABLE, enable ? 1 : 0);
4146 static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
4149 WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PD_ENABLE, enable ? 1 : 0);
4152 static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable)
4154 WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 0 : 1);
4157 static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
4159 if ((adev->asic_type == CHIP_CARRIZO) ||
4160 (adev->asic_type == CHIP_STONEY)) {
4161 gfx_v8_0_init_csb(adev);
4162 gfx_v8_0_init_save_restore_list(adev);
4163 gfx_v8_0_enable_save_restore_machine(adev);
4164 WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
4165 gfx_v8_0_init_power_gating(adev);
4166 WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
4167 } else if ((adev->asic_type == CHIP_POLARIS11) ||
4168 (adev->asic_type == CHIP_POLARIS12)) {
4169 gfx_v8_0_init_csb(adev);
4170 gfx_v8_0_init_save_restore_list(adev);
4171 gfx_v8_0_enable_save_restore_machine(adev);
4172 gfx_v8_0_init_power_gating(adev);
4177 static void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
4179 WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 0);
4181 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4182 gfx_v8_0_wait_for_rlc_serdes(adev);
4185 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
4187 WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4190 WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
4194 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
4196 WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 1);
4198 /* carrizo do enable cp interrupt after cp inited */
4199 if (!(adev->flags & AMD_IS_APU))
4200 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4205 static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev)
4207 const struct rlc_firmware_header_v2_0 *hdr;
4208 const __le32 *fw_data;
4209 unsigned i, fw_size;
4211 if (!adev->gfx.rlc_fw)
4214 hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
4215 amdgpu_ucode_print_rlc_hdr(&hdr->header);
4217 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
4218 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
4219 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
4221 WREG32(mmRLC_GPM_UCODE_ADDR, 0);
4222 for (i = 0; i < fw_size; i++)
4223 WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
4224 WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
4229 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
4234 gfx_v8_0_rlc_stop(adev);
4237 tmp = RREG32(mmRLC_CGCG_CGLS_CTRL);
4238 tmp &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
4239 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4240 WREG32(mmRLC_CGCG_CGLS_CTRL, tmp);
4241 if (adev->asic_type == CHIP_POLARIS11 ||
4242 adev->asic_type == CHIP_POLARIS10 ||
4243 adev->asic_type == CHIP_POLARIS12) {
4244 tmp = RREG32(mmRLC_CGCG_CGLS_CTRL_3D);
4246 WREG32(mmRLC_CGCG_CGLS_CTRL_3D, tmp);
4250 WREG32(mmRLC_PG_CNTL, 0);
4252 gfx_v8_0_rlc_reset(adev);
4253 gfx_v8_0_init_pg(adev);
4255 if (!adev->pp_enabled) {
4256 if (adev->firmware.load_type != AMDGPU_FW_LOAD_SMU) {
4257 /* legacy rlc firmware loading */
4258 r = gfx_v8_0_rlc_load_microcode(adev);
4262 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4263 AMDGPU_UCODE_ID_RLC_G);
4269 gfx_v8_0_rlc_start(adev);
4274 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
4277 u32 tmp = RREG32(mmCP_ME_CNTL);
4280 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
4281 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
4282 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
4284 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
4285 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
4286 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
4287 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
4288 adev->gfx.gfx_ring[i].ready = false;
4290 WREG32(mmCP_ME_CNTL, tmp);
4294 static int gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
4296 const struct gfx_firmware_header_v1_0 *pfp_hdr;
4297 const struct gfx_firmware_header_v1_0 *ce_hdr;
4298 const struct gfx_firmware_header_v1_0 *me_hdr;
4299 const __le32 *fw_data;
4300 unsigned i, fw_size;
4302 if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
4305 pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
4306 adev->gfx.pfp_fw->data;
4307 ce_hdr = (const struct gfx_firmware_header_v1_0 *)
4308 adev->gfx.ce_fw->data;
4309 me_hdr = (const struct gfx_firmware_header_v1_0 *)
4310 adev->gfx.me_fw->data;
4312 amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
4313 amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
4314 amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
4316 gfx_v8_0_cp_gfx_enable(adev, false);
4319 fw_data = (const __le32 *)
4320 (adev->gfx.pfp_fw->data +
4321 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
4322 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
4323 WREG32(mmCP_PFP_UCODE_ADDR, 0);
4324 for (i = 0; i < fw_size; i++)
4325 WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
4326 WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
4329 fw_data = (const __le32 *)
4330 (adev->gfx.ce_fw->data +
4331 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
4332 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
4333 WREG32(mmCP_CE_UCODE_ADDR, 0);
4334 for (i = 0; i < fw_size; i++)
4335 WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
4336 WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
4339 fw_data = (const __le32 *)
4340 (adev->gfx.me_fw->data +
4341 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
4342 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
4343 WREG32(mmCP_ME_RAM_WADDR, 0);
4344 for (i = 0; i < fw_size; i++)
4345 WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
4346 WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
4351 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
4354 const struct cs_section_def *sect = NULL;
4355 const struct cs_extent_def *ext = NULL;
4357 /* begin clear state */
4359 /* context control state */
4362 for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4363 for (ext = sect->section; ext->extent != NULL; ++ext) {
4364 if (sect->id == SECT_CONTEXT)
4365 count += 2 + ext->reg_count;
4370 /* pa_sc_raster_config/pa_sc_raster_config1 */
4372 /* end clear state */
4380 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
4382 struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
4383 const struct cs_section_def *sect = NULL;
4384 const struct cs_extent_def *ext = NULL;
4388 WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
4389 WREG32(mmCP_ENDIAN_SWAP, 0);
4390 WREG32(mmCP_DEVICE_ID, 1);
4392 gfx_v8_0_cp_gfx_enable(adev, true);
4394 r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
4396 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
4400 /* clear state buffer */
4401 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4402 amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4404 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4405 amdgpu_ring_write(ring, 0x80000000);
4406 amdgpu_ring_write(ring, 0x80000000);
4408 for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4409 for (ext = sect->section; ext->extent != NULL; ++ext) {
4410 if (sect->id == SECT_CONTEXT) {
4411 amdgpu_ring_write(ring,
4412 PACKET3(PACKET3_SET_CONTEXT_REG,
4414 amdgpu_ring_write(ring,
4415 ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
4416 for (i = 0; i < ext->reg_count; i++)
4417 amdgpu_ring_write(ring, ext->extent[i]);
4422 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4423 amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
4424 switch (adev->asic_type) {
4426 case CHIP_POLARIS10:
4427 amdgpu_ring_write(ring, 0x16000012);
4428 amdgpu_ring_write(ring, 0x0000002A);
4430 case CHIP_POLARIS11:
4431 case CHIP_POLARIS12:
4432 amdgpu_ring_write(ring, 0x16000012);
4433 amdgpu_ring_write(ring, 0x00000000);
4436 amdgpu_ring_write(ring, 0x3a00161a);
4437 amdgpu_ring_write(ring, 0x0000002e);
4440 amdgpu_ring_write(ring, 0x00000002);
4441 amdgpu_ring_write(ring, 0x00000000);
4444 amdgpu_ring_write(ring, adev->gfx.config.num_rbs == 1 ?
4445 0x00000000 : 0x00000002);
4446 amdgpu_ring_write(ring, 0x00000000);
4449 amdgpu_ring_write(ring, 0x00000000);
4450 amdgpu_ring_write(ring, 0x00000000);
4456 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4457 amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4459 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4460 amdgpu_ring_write(ring, 0);
4462 /* init the CE partitions */
4463 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4464 amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4465 amdgpu_ring_write(ring, 0x8000);
4466 amdgpu_ring_write(ring, 0x8000);
4468 amdgpu_ring_commit(ring);
4473 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
4475 struct amdgpu_ring *ring;
4478 u64 rb_addr, rptr_addr, wptr_gpu_addr;
4481 /* Set the write pointer delay */
4482 WREG32(mmCP_RB_WPTR_DELAY, 0);
4484 /* set the RB to use vmid 0 */
4485 WREG32(mmCP_RB_VMID, 0);
4487 /* Set ring buffer size */
4488 ring = &adev->gfx.gfx_ring[0];
4489 rb_bufsz = order_base_2(ring->ring_size / 8);
4490 tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
4491 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
4492 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
4493 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
4495 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
4497 WREG32(mmCP_RB0_CNTL, tmp);
4499 /* Initialize the ring buffer's read and write pointers */
4500 WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
4502 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
4504 /* set the wb address wether it's enabled or not */
4505 rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4506 WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
4507 WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
4509 wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4510 WREG32(mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
4511 WREG32(mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
4513 WREG32(mmCP_RB0_CNTL, tmp);
4515 rb_addr = ring->gpu_addr >> 8;
4516 WREG32(mmCP_RB0_BASE, rb_addr);
4517 WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
4519 /* no gfx doorbells on iceland */
4520 if (adev->asic_type != CHIP_TOPAZ) {
4521 tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
4522 if (ring->use_doorbell) {
4523 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4524 DOORBELL_OFFSET, ring->doorbell_index);
4525 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4527 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4530 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4533 WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
4535 if (adev->asic_type == CHIP_TONGA) {
4536 tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
4537 DOORBELL_RANGE_LOWER,
4538 AMDGPU_DOORBELL_GFX_RING0);
4539 WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
4541 WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
4542 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
4547 /* start the ring */
4548 amdgpu_ring_clear_ring(ring);
4549 gfx_v8_0_cp_gfx_start(adev);
4551 r = amdgpu_ring_test_ring(ring);
4553 ring->ready = false;
4558 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
4563 WREG32(mmCP_MEC_CNTL, 0);
4565 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
4566 for (i = 0; i < adev->gfx.num_compute_rings; i++)
4567 adev->gfx.compute_ring[i].ready = false;
4568 adev->gfx.kiq.ring.ready = false;
4573 static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev)
4575 const struct gfx_firmware_header_v1_0 *mec_hdr;
4576 const __le32 *fw_data;
4577 unsigned i, fw_size;
4579 if (!adev->gfx.mec_fw)
4582 gfx_v8_0_cp_compute_enable(adev, false);
4584 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
4585 amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
4587 fw_data = (const __le32 *)
4588 (adev->gfx.mec_fw->data +
4589 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4590 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4593 WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0);
4594 for (i = 0; i < fw_size; i++)
4595 WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data+i));
4596 WREG32(mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version);
4598 /* Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
4599 if (adev->gfx.mec2_fw) {
4600 const struct gfx_firmware_header_v1_0 *mec2_hdr;
4602 mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
4603 amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header);
4605 fw_data = (const __le32 *)
4606 (adev->gfx.mec2_fw->data +
4607 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4608 fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4610 WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0);
4611 for (i = 0; i < fw_size; i++)
4612 WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data+i));
4613 WREG32(mmCP_MEC_ME2_UCODE_ADDR, adev->gfx.mec2_fw_version);
4619 static void gfx_v8_0_cp_compute_fini(struct amdgpu_device *adev)
4623 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4624 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4626 if (ring->mqd_obj) {
4627 r = amdgpu_bo_reserve(ring->mqd_obj, false);
4628 if (unlikely(r != 0))
4629 dev_warn(adev->dev, "(%d) reserve MQD bo failed\n", r);
4631 amdgpu_bo_unpin(ring->mqd_obj);
4632 amdgpu_bo_unreserve(ring->mqd_obj);
4634 amdgpu_bo_unref(&ring->mqd_obj);
4635 ring->mqd_obj = NULL;
4636 ring->mqd_ptr = NULL;
4637 ring->mqd_gpu_addr = 0;
4643 static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring)
4646 struct amdgpu_device *adev = ring->adev;
4648 /* tell RLC which is KIQ queue */
4649 tmp = RREG32(mmRLC_CP_SCHEDULERS);
4651 tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
4652 WREG32(mmRLC_CP_SCHEDULERS, tmp);
4654 WREG32(mmRLC_CP_SCHEDULERS, tmp);
4657 static void gfx_v8_0_kiq_enable(struct amdgpu_ring *ring)
4659 amdgpu_ring_alloc(ring, 8);
4661 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_RESOURCES, 6));
4662 amdgpu_ring_write(ring, 0); /* vmid_mask:0 queue_type:0 (KIQ) */
4663 amdgpu_ring_write(ring, 0x000000FF); /* queue mask lo */
4664 amdgpu_ring_write(ring, 0); /* queue mask hi */
4665 amdgpu_ring_write(ring, 0); /* gws mask lo */
4666 amdgpu_ring_write(ring, 0); /* gws mask hi */
4667 amdgpu_ring_write(ring, 0); /* oac mask */
4668 amdgpu_ring_write(ring, 0); /* gds heap base:0, gds heap size:0 */
4669 amdgpu_ring_commit(ring);
4673 static void gfx_v8_0_map_queue_enable(struct amdgpu_ring *kiq_ring,
4674 struct amdgpu_ring *ring)
4676 struct amdgpu_device *adev = kiq_ring->adev;
4677 uint64_t mqd_addr, wptr_addr;
4679 mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
4680 wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4681 amdgpu_ring_alloc(kiq_ring, 8);
4683 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
4684 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
4685 amdgpu_ring_write(kiq_ring, 0x21010000);
4686 amdgpu_ring_write(kiq_ring, (ring->doorbell_index << 2) |
4687 (ring->queue << 26) |
4688 (ring->pipe << 29) |
4689 ((ring->me == 1 ? 0 : 1) << 31)); /* doorbell */
4690 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
4691 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
4692 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
4693 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
4694 amdgpu_ring_commit(kiq_ring);
4698 static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
4700 struct amdgpu_device *adev = ring->adev;
4701 struct vi_mqd *mqd = ring->mqd_ptr;
4702 uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
4705 mqd->header = 0xC0310800;
4706 mqd->compute_pipelinestat_enable = 0x00000001;
4707 mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4708 mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4709 mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4710 mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4711 mqd->compute_misc_reserved = 0x00000003;
4713 eop_base_addr = ring->eop_gpu_addr >> 8;
4714 mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
4715 mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
4717 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4718 tmp = RREG32(mmCP_HQD_EOP_CONTROL);
4719 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
4720 (order_base_2(MEC_HPD_SIZE / 4) - 1));
4722 mqd->cp_hqd_eop_control = tmp;
4724 /* enable doorbell? */
4725 tmp = REG_SET_FIELD(RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL),
4726 CP_HQD_PQ_DOORBELL_CONTROL,
4728 ring->use_doorbell ? 1 : 0);
4730 mqd->cp_hqd_pq_doorbell_control = tmp;
4732 /* disable the queue if it's active */
4733 mqd->cp_hqd_dequeue_request = 0;
4734 mqd->cp_hqd_pq_rptr = 0;
4735 mqd->cp_hqd_pq_wptr = 0;
4737 /* set the pointer to the MQD */
4738 mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
4739 mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
4741 /* set MQD vmid to 0 */
4742 tmp = RREG32(mmCP_MQD_CONTROL);
4743 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4744 mqd->cp_mqd_control = tmp;
4746 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4747 hqd_gpu_addr = ring->gpu_addr >> 8;
4748 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4749 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4751 /* set up the HQD, this is similar to CP_RB0_CNTL */
4752 tmp = RREG32(mmCP_HQD_PQ_CONTROL);
4753 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4754 (order_base_2(ring->ring_size / 4) - 1));
4755 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4756 ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
4758 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
4760 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
4761 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
4762 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4763 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4764 mqd->cp_hqd_pq_control = tmp;
4766 /* set the wb address whether it's enabled or not */
4767 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4768 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4769 mqd->cp_hqd_pq_rptr_report_addr_hi =
4770 upper_32_bits(wb_gpu_addr) & 0xffff;
4772 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4773 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4774 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
4775 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4778 /* enable the doorbell if requested */
4779 if (ring->use_doorbell) {
4780 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4781 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4782 DOORBELL_OFFSET, ring->doorbell_index);
4784 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4786 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4787 DOORBELL_SOURCE, 0);
4788 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4792 mqd->cp_hqd_pq_doorbell_control = tmp;
4794 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4796 mqd->cp_hqd_pq_wptr = ring->wptr;
4797 mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
4799 /* set the vmid for the queue */
4800 mqd->cp_hqd_vmid = 0;
4802 tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
4803 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
4804 mqd->cp_hqd_persistent_state = tmp;
4806 /* activate the queue */
4807 mqd->cp_hqd_active = 1;
4812 static int gfx_v8_0_kiq_init_register(struct amdgpu_ring *ring)
4814 struct amdgpu_device *adev = ring->adev;
4815 struct vi_mqd *mqd = ring->mqd_ptr;
4818 /* disable wptr polling */
4819 WREG32_FIELD(CP_PQ_WPTR_POLL_CNTL, EN, 0);
4821 WREG32(mmCP_HQD_EOP_BASE_ADDR, mqd->cp_hqd_eop_base_addr_lo);
4822 WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, mqd->cp_hqd_eop_base_addr_hi);
4824 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4825 WREG32(mmCP_HQD_EOP_CONTROL, mqd->cp_hqd_eop_control);
4827 /* enable doorbell? */
4828 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, mqd->cp_hqd_pq_doorbell_control);
4830 /* disable the queue if it's active */
4831 if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) {
4832 WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1);
4833 for (j = 0; j < adev->usec_timeout; j++) {
4834 if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK))
4838 WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request);
4839 WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr);
4840 WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
4843 /* set the pointer to the MQD */
4844 WREG32(mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo);
4845 WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
4847 /* set MQD vmid to 0 */
4848 WREG32(mmCP_MQD_CONTROL, mqd->cp_mqd_control);
4850 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4851 WREG32(mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo);
4852 WREG32(mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi);
4854 /* set up the HQD, this is similar to CP_RB0_CNTL */
4855 WREG32(mmCP_HQD_PQ_CONTROL, mqd->cp_hqd_pq_control);
4857 /* set the wb address whether it's enabled or not */
4858 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR,
4859 mqd->cp_hqd_pq_rptr_report_addr_lo);
4860 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4861 mqd->cp_hqd_pq_rptr_report_addr_hi);
4863 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4864 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr_lo);
4865 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, mqd->cp_hqd_pq_wptr_poll_addr_hi);
4867 /* enable the doorbell if requested */
4868 if (ring->use_doorbell) {
4869 if ((adev->asic_type == CHIP_CARRIZO) ||
4870 (adev->asic_type == CHIP_FIJI) ||
4871 (adev->asic_type == CHIP_STONEY)) {
4872 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER,
4873 AMDGPU_DOORBELL_KIQ << 2);
4874 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER,
4875 AMDGPU_DOORBELL_MEC_RING7 << 2);
4878 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, mqd->cp_hqd_pq_doorbell_control);
4880 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4881 WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
4883 /* set the vmid for the queue */
4884 WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid);
4886 WREG32(mmCP_HQD_PERSISTENT_STATE, mqd->cp_hqd_persistent_state);
4888 /* activate the queue */
4889 WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active);
4891 if (ring->use_doorbell)
4892 WREG32_FIELD(CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4897 static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring)
4899 struct amdgpu_device *adev = ring->adev;
4900 struct amdgpu_kiq *kiq = &adev->gfx.kiq;
4901 struct vi_mqd *mqd = ring->mqd_ptr;
4902 bool is_kiq = (ring->funcs->type == AMDGPU_RING_TYPE_KIQ);
4903 int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
4906 gfx_v8_0_kiq_setting(&kiq->ring);
4908 mqd_idx = ring - &adev->gfx.compute_ring[0];
4911 if (!adev->gfx.in_reset) {
4912 memset((void *)mqd, 0, sizeof(*mqd));
4913 mutex_lock(&adev->srbm_mutex);
4914 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4915 gfx_v8_0_mqd_init(ring);
4917 gfx_v8_0_kiq_init_register(ring);
4918 vi_srbm_select(adev, 0, 0, 0, 0);
4919 mutex_unlock(&adev->srbm_mutex);
4921 if (adev->gfx.mec.mqd_backup[mqd_idx])
4922 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
4923 } else { /* for GPU_RESET case */
4924 /* reset MQD to a clean status */
4925 if (adev->gfx.mec.mqd_backup[mqd_idx])
4926 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd));
4928 /* reset ring buffer */
4930 amdgpu_ring_clear_ring(ring);
4933 mutex_lock(&adev->srbm_mutex);
4934 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4935 gfx_v8_0_kiq_init_register(ring);
4936 vi_srbm_select(adev, 0, 0, 0, 0);
4937 mutex_unlock(&adev->srbm_mutex);
4942 gfx_v8_0_kiq_enable(ring);
4944 gfx_v8_0_map_queue_enable(&kiq->ring, ring);
4949 static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev)
4951 struct amdgpu_ring *ring = NULL;
4954 gfx_v8_0_cp_compute_enable(adev, true);
4956 ring = &adev->gfx.kiq.ring;
4958 r = amdgpu_bo_reserve(ring->mqd_obj, false);
4959 if (unlikely(r != 0))
4962 r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4964 r = gfx_v8_0_kiq_init_queue(ring);
4965 amdgpu_bo_kunmap(ring->mqd_obj);
4966 ring->mqd_ptr = NULL;
4968 amdgpu_bo_unreserve(ring->mqd_obj);
4973 r = amdgpu_ring_test_ring(ring);
4975 ring->ready = false;
4979 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4980 ring = &adev->gfx.compute_ring[i];
4982 r = amdgpu_bo_reserve(ring->mqd_obj, false);
4983 if (unlikely(r != 0))
4985 r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4987 r = gfx_v8_0_kiq_init_queue(ring);
4988 amdgpu_bo_kunmap(ring->mqd_obj);
4989 ring->mqd_ptr = NULL;
4991 amdgpu_bo_unreserve(ring->mqd_obj);
4996 r = amdgpu_ring_test_ring(ring);
4998 ring->ready = false;
5005 static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev)
5009 bool use_doorbell = true;
5017 /* init the queues. */
5018 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5019 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5021 if (ring->mqd_obj == NULL) {
5022 r = amdgpu_bo_create(adev,
5023 sizeof(struct vi_mqd),
5025 AMDGPU_GEM_DOMAIN_GTT, 0, NULL,
5026 NULL, &ring->mqd_obj);
5028 dev_warn(adev->dev, "(%d) create MQD bo failed\n", r);
5033 r = amdgpu_bo_reserve(ring->mqd_obj, false);
5034 if (unlikely(r != 0)) {
5035 gfx_v8_0_cp_compute_fini(adev);
5038 r = amdgpu_bo_pin(ring->mqd_obj, AMDGPU_GEM_DOMAIN_GTT,
5041 dev_warn(adev->dev, "(%d) pin MQD bo failed\n", r);
5042 gfx_v8_0_cp_compute_fini(adev);
5045 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&buf);
5047 dev_warn(adev->dev, "(%d) map MQD bo failed\n", r);
5048 gfx_v8_0_cp_compute_fini(adev);
5052 /* init the mqd struct */
5053 memset(buf, 0, sizeof(struct vi_mqd));
5055 mqd = (struct vi_mqd *)buf;
5056 mqd->header = 0xC0310800;
5057 mqd->compute_pipelinestat_enable = 0x00000001;
5058 mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
5059 mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
5060 mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
5061 mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
5062 mqd->compute_misc_reserved = 0x00000003;
5064 mutex_lock(&adev->srbm_mutex);
5065 vi_srbm_select(adev, ring->me,
5069 eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE);
5072 /* write the EOP addr */
5073 WREG32(mmCP_HQD_EOP_BASE_ADDR, eop_gpu_addr);
5074 WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr));
5076 /* set the VMID assigned */
5077 WREG32(mmCP_HQD_VMID, 0);
5079 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
5080 tmp = RREG32(mmCP_HQD_EOP_CONTROL);
5081 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
5082 (order_base_2(MEC_HPD_SIZE / 4) - 1));
5083 WREG32(mmCP_HQD_EOP_CONTROL, tmp);
5085 /* disable wptr polling */
5086 tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL);
5087 tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0);
5088 WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp);
5090 mqd->cp_hqd_eop_base_addr_lo =
5091 RREG32(mmCP_HQD_EOP_BASE_ADDR);
5092 mqd->cp_hqd_eop_base_addr_hi =
5093 RREG32(mmCP_HQD_EOP_BASE_ADDR_HI);
5095 /* enable doorbell? */
5096 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
5098 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
5100 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 0);
5102 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, tmp);
5103 mqd->cp_hqd_pq_doorbell_control = tmp;
5105 /* disable the queue if it's active */
5106 mqd->cp_hqd_dequeue_request = 0;
5107 mqd->cp_hqd_pq_rptr = 0;
5108 mqd->cp_hqd_pq_wptr= 0;
5109 if (RREG32(mmCP_HQD_ACTIVE) & 1) {
5110 WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1);
5111 for (j = 0; j < adev->usec_timeout; j++) {
5112 if (!(RREG32(mmCP_HQD_ACTIVE) & 1))
5116 WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request);
5117 WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr);
5118 WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
5121 /* set the pointer to the MQD */
5122 mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffffffc;
5123 mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
5124 WREG32(mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo);
5125 WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
5127 /* set MQD vmid to 0 */
5128 tmp = RREG32(mmCP_MQD_CONTROL);
5129 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
5130 WREG32(mmCP_MQD_CONTROL, tmp);
5131 mqd->cp_mqd_control = tmp;
5133 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
5134 hqd_gpu_addr = ring->gpu_addr >> 8;
5135 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
5136 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
5137 WREG32(mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo);
5138 WREG32(mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi);
5140 /* set up the HQD, this is similar to CP_RB0_CNTL */
5141 tmp = RREG32(mmCP_HQD_PQ_CONTROL);
5142 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
5143 (order_base_2(ring->ring_size / 4) - 1));
5144 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
5145 ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
5147 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
5149 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
5150 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
5151 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
5152 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
5153 WREG32(mmCP_HQD_PQ_CONTROL, tmp);
5154 mqd->cp_hqd_pq_control = tmp;
5156 /* set the wb address wether it's enabled or not */
5157 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
5158 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
5159 mqd->cp_hqd_pq_rptr_report_addr_hi =
5160 upper_32_bits(wb_gpu_addr) & 0xffff;
5161 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR,
5162 mqd->cp_hqd_pq_rptr_report_addr_lo);
5163 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
5164 mqd->cp_hqd_pq_rptr_report_addr_hi);
5166 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
5167 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
5168 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
5169 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
5170 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr_lo);
5171 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
5172 mqd->cp_hqd_pq_wptr_poll_addr_hi);
5174 /* enable the doorbell if requested */
5176 if ((adev->asic_type == CHIP_CARRIZO) ||
5177 (adev->asic_type == CHIP_FIJI) ||
5178 (adev->asic_type == CHIP_STONEY) ||
5179 (adev->asic_type == CHIP_POLARIS11) ||
5180 (adev->asic_type == CHIP_POLARIS10) ||
5181 (adev->asic_type == CHIP_POLARIS12)) {
5182 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER,
5183 AMDGPU_DOORBELL_KIQ << 2);
5184 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER,
5185 AMDGPU_DOORBELL_MEC_RING7 << 2);
5187 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
5188 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
5189 DOORBELL_OFFSET, ring->doorbell_index);
5190 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
5191 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_SOURCE, 0);
5192 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_HIT, 0);
5193 mqd->cp_hqd_pq_doorbell_control = tmp;
5196 mqd->cp_hqd_pq_doorbell_control = 0;
5198 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL,
5199 mqd->cp_hqd_pq_doorbell_control);
5201 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
5203 mqd->cp_hqd_pq_wptr = lower_32_bits(ring->wptr);
5204 WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
5205 mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
5207 /* set the vmid for the queue */
5208 mqd->cp_hqd_vmid = 0;
5209 WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid);
5211 tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
5212 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
5213 WREG32(mmCP_HQD_PERSISTENT_STATE, tmp);
5214 mqd->cp_hqd_persistent_state = tmp;
5215 if (adev->asic_type == CHIP_STONEY ||
5216 adev->asic_type == CHIP_POLARIS11 ||
5217 adev->asic_type == CHIP_POLARIS10 ||
5218 adev->asic_type == CHIP_POLARIS12) {
5219 tmp = RREG32(mmCP_ME1_PIPE3_INT_CNTL);
5220 tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE3_INT_CNTL, GENERIC2_INT_ENABLE, 1);
5221 WREG32(mmCP_ME1_PIPE3_INT_CNTL, tmp);
5224 /* activate the queue */
5225 mqd->cp_hqd_active = 1;
5226 WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active);
5228 vi_srbm_select(adev, 0, 0, 0, 0);
5229 mutex_unlock(&adev->srbm_mutex);
5231 amdgpu_bo_kunmap(ring->mqd_obj);
5232 amdgpu_bo_unreserve(ring->mqd_obj);
5236 tmp = RREG32(mmCP_PQ_STATUS);
5237 tmp = REG_SET_FIELD(tmp, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
5238 WREG32(mmCP_PQ_STATUS, tmp);
5241 gfx_v8_0_cp_compute_enable(adev, true);
5243 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5244 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5247 r = amdgpu_ring_test_ring(ring);
5249 ring->ready = false;
5255 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
5259 if (!(adev->flags & AMD_IS_APU))
5260 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5262 if (!adev->pp_enabled) {
5263 if (adev->firmware.load_type != AMDGPU_FW_LOAD_SMU) {
5264 /* legacy firmware loading */
5265 r = gfx_v8_0_cp_gfx_load_microcode(adev);
5269 r = gfx_v8_0_cp_compute_load_microcode(adev);
5273 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
5274 AMDGPU_UCODE_ID_CP_CE);
5278 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
5279 AMDGPU_UCODE_ID_CP_PFP);
5283 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
5284 AMDGPU_UCODE_ID_CP_ME);
5288 if (adev->asic_type == CHIP_TOPAZ) {
5289 r = gfx_v8_0_cp_compute_load_microcode(adev);
5293 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
5294 AMDGPU_UCODE_ID_CP_MEC1);
5301 r = gfx_v8_0_cp_gfx_resume(adev);
5305 if (amdgpu_sriov_vf(adev))
5306 r = gfx_v8_0_kiq_resume(adev);
5308 r = gfx_v8_0_cp_compute_resume(adev);
5312 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5317 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
5319 gfx_v8_0_cp_gfx_enable(adev, enable);
5320 gfx_v8_0_cp_compute_enable(adev, enable);
5323 static int gfx_v8_0_hw_init(void *handle)
5326 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5328 gfx_v8_0_init_golden_registers(adev);
5329 gfx_v8_0_gpu_init(adev);
5331 r = gfx_v8_0_rlc_resume(adev);
5335 r = gfx_v8_0_cp_resume(adev);
5340 static int gfx_v8_0_hw_fini(void *handle)
5342 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5344 amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
5345 amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
5346 if (amdgpu_sriov_vf(adev)) {
5347 pr_debug("For SRIOV client, shouldn't do anything.\n");
5350 gfx_v8_0_cp_enable(adev, false);
5351 gfx_v8_0_rlc_stop(adev);
5352 gfx_v8_0_cp_compute_fini(adev);
5354 amdgpu_set_powergating_state(adev,
5355 AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_UNGATE);
5360 static int gfx_v8_0_suspend(void *handle)
5362 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5364 return gfx_v8_0_hw_fini(adev);
5367 static int gfx_v8_0_resume(void *handle)
5369 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5371 return gfx_v8_0_hw_init(adev);
5374 static bool gfx_v8_0_is_idle(void *handle)
5376 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5378 if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE))
5384 static int gfx_v8_0_wait_for_idle(void *handle)
5387 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5389 for (i = 0; i < adev->usec_timeout; i++) {
5390 if (gfx_v8_0_is_idle(handle))
5398 static bool gfx_v8_0_check_soft_reset(void *handle)
5400 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5401 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5405 tmp = RREG32(mmGRBM_STATUS);
5406 if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
5407 GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
5408 GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
5409 GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
5410 GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
5411 GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK |
5412 GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
5413 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5414 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
5415 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5416 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
5417 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5418 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5422 tmp = RREG32(mmGRBM_STATUS2);
5423 if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
5424 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5425 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
5427 if (REG_GET_FIELD(tmp, GRBM_STATUS2, CPF_BUSY) ||
5428 REG_GET_FIELD(tmp, GRBM_STATUS2, CPC_BUSY) ||
5429 REG_GET_FIELD(tmp, GRBM_STATUS2, CPG_BUSY)) {
5430 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5432 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5434 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5436 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET,
5437 SOFT_RESET_GRBM, 1);
5441 tmp = RREG32(mmSRBM_STATUS);
5442 if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
5443 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5444 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5445 if (REG_GET_FIELD(tmp, SRBM_STATUS, SEM_BUSY))
5446 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5447 SRBM_SOFT_RESET, SOFT_RESET_SEM, 1);
5449 if (grbm_soft_reset || srbm_soft_reset) {
5450 adev->gfx.grbm_soft_reset = grbm_soft_reset;
5451 adev->gfx.srbm_soft_reset = srbm_soft_reset;
5454 adev->gfx.grbm_soft_reset = 0;
5455 adev->gfx.srbm_soft_reset = 0;
5460 static void gfx_v8_0_inactive_hqd(struct amdgpu_device *adev,
5461 struct amdgpu_ring *ring)
5465 mutex_lock(&adev->srbm_mutex);
5466 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5467 if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) {
5468 WREG32_FIELD(CP_HQD_DEQUEUE_REQUEST, DEQUEUE_REQ, 2);
5469 for (i = 0; i < adev->usec_timeout; i++) {
5470 if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK))
5475 vi_srbm_select(adev, 0, 0, 0, 0);
5476 mutex_unlock(&adev->srbm_mutex);
5479 static int gfx_v8_0_pre_soft_reset(void *handle)
5481 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5482 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5484 if ((!adev->gfx.grbm_soft_reset) &&
5485 (!adev->gfx.srbm_soft_reset))
5488 grbm_soft_reset = adev->gfx.grbm_soft_reset;
5489 srbm_soft_reset = adev->gfx.srbm_soft_reset;
5492 gfx_v8_0_rlc_stop(adev);
5494 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5495 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5496 /* Disable GFX parsing/prefetching */
5497 gfx_v8_0_cp_gfx_enable(adev, false);
5499 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5500 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5501 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5502 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5505 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5506 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5508 gfx_v8_0_inactive_hqd(adev, ring);
5510 /* Disable MEC parsing/prefetching */
5511 gfx_v8_0_cp_compute_enable(adev, false);
5517 static int gfx_v8_0_soft_reset(void *handle)
5519 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5520 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5523 if ((!adev->gfx.grbm_soft_reset) &&
5524 (!adev->gfx.srbm_soft_reset))
5527 grbm_soft_reset = adev->gfx.grbm_soft_reset;
5528 srbm_soft_reset = adev->gfx.srbm_soft_reset;
5530 if (grbm_soft_reset || srbm_soft_reset) {
5531 tmp = RREG32(mmGMCON_DEBUG);
5532 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 1);
5533 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 1);
5534 WREG32(mmGMCON_DEBUG, tmp);
5538 if (grbm_soft_reset) {
5539 tmp = RREG32(mmGRBM_SOFT_RESET);
5540 tmp |= grbm_soft_reset;
5541 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5542 WREG32(mmGRBM_SOFT_RESET, tmp);
5543 tmp = RREG32(mmGRBM_SOFT_RESET);
5547 tmp &= ~grbm_soft_reset;
5548 WREG32(mmGRBM_SOFT_RESET, tmp);
5549 tmp = RREG32(mmGRBM_SOFT_RESET);
5552 if (srbm_soft_reset) {
5553 tmp = RREG32(mmSRBM_SOFT_RESET);
5554 tmp |= srbm_soft_reset;
5555 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5556 WREG32(mmSRBM_SOFT_RESET, tmp);
5557 tmp = RREG32(mmSRBM_SOFT_RESET);
5561 tmp &= ~srbm_soft_reset;
5562 WREG32(mmSRBM_SOFT_RESET, tmp);
5563 tmp = RREG32(mmSRBM_SOFT_RESET);
5566 if (grbm_soft_reset || srbm_soft_reset) {
5567 tmp = RREG32(mmGMCON_DEBUG);
5568 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 0);
5569 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 0);
5570 WREG32(mmGMCON_DEBUG, tmp);
5573 /* Wait a little for things to settle down */
5579 static void gfx_v8_0_init_hqd(struct amdgpu_device *adev,
5580 struct amdgpu_ring *ring)
5582 mutex_lock(&adev->srbm_mutex);
5583 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5584 WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
5585 WREG32(mmCP_HQD_PQ_RPTR, 0);
5586 WREG32(mmCP_HQD_PQ_WPTR, 0);
5587 vi_srbm_select(adev, 0, 0, 0, 0);
5588 mutex_unlock(&adev->srbm_mutex);
5591 static int gfx_v8_0_post_soft_reset(void *handle)
5593 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5594 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5596 if ((!adev->gfx.grbm_soft_reset) &&
5597 (!adev->gfx.srbm_soft_reset))
5600 grbm_soft_reset = adev->gfx.grbm_soft_reset;
5601 srbm_soft_reset = adev->gfx.srbm_soft_reset;
5603 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5604 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5605 gfx_v8_0_cp_gfx_resume(adev);
5607 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5608 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5609 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5610 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5613 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5614 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5616 gfx_v8_0_init_hqd(adev, ring);
5618 gfx_v8_0_cp_compute_resume(adev);
5620 gfx_v8_0_rlc_start(adev);
5626 * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
5628 * @adev: amdgpu_device pointer
5630 * Fetches a GPU clock counter snapshot.
5631 * Returns the 64 bit clock counter snapshot.
5633 static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
5637 mutex_lock(&adev->gfx.gpu_clock_mutex);
5638 WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
5639 clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
5640 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5641 mutex_unlock(&adev->gfx.gpu_clock_mutex);
5645 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
5647 uint32_t gds_base, uint32_t gds_size,
5648 uint32_t gws_base, uint32_t gws_size,
5649 uint32_t oa_base, uint32_t oa_size)
5651 gds_base = gds_base >> AMDGPU_GDS_SHIFT;
5652 gds_size = gds_size >> AMDGPU_GDS_SHIFT;
5654 gws_base = gws_base >> AMDGPU_GWS_SHIFT;
5655 gws_size = gws_size >> AMDGPU_GWS_SHIFT;
5657 oa_base = oa_base >> AMDGPU_OA_SHIFT;
5658 oa_size = oa_size >> AMDGPU_OA_SHIFT;
5661 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5662 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5663 WRITE_DATA_DST_SEL(0)));
5664 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
5665 amdgpu_ring_write(ring, 0);
5666 amdgpu_ring_write(ring, gds_base);
5669 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5670 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5671 WRITE_DATA_DST_SEL(0)));
5672 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
5673 amdgpu_ring_write(ring, 0);
5674 amdgpu_ring_write(ring, gds_size);
5677 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5678 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5679 WRITE_DATA_DST_SEL(0)));
5680 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
5681 amdgpu_ring_write(ring, 0);
5682 amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
5685 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5686 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5687 WRITE_DATA_DST_SEL(0)));
5688 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
5689 amdgpu_ring_write(ring, 0);
5690 amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
5693 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
5695 WREG32(mmSQ_IND_INDEX,
5696 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5697 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5698 (address << SQ_IND_INDEX__INDEX__SHIFT) |
5699 (SQ_IND_INDEX__FORCE_READ_MASK));
5700 return RREG32(mmSQ_IND_DATA);
5703 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
5704 uint32_t wave, uint32_t thread,
5705 uint32_t regno, uint32_t num, uint32_t *out)
5707 WREG32(mmSQ_IND_INDEX,
5708 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5709 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5710 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
5711 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
5712 (SQ_IND_INDEX__FORCE_READ_MASK) |
5713 (SQ_IND_INDEX__AUTO_INCR_MASK));
5715 *(out++) = RREG32(mmSQ_IND_DATA);
5718 static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
5720 /* type 0 wave data */
5721 dst[(*no_fields)++] = 0;
5722 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
5723 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
5724 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
5725 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
5726 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
5727 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
5728 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
5729 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
5730 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
5731 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
5732 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
5733 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
5734 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_LO);
5735 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_HI);
5736 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_LO);
5737 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI);
5738 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
5739 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
5742 static void gfx_v8_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
5743 uint32_t wave, uint32_t start,
5744 uint32_t size, uint32_t *dst)
5747 adev, simd, wave, 0,
5748 start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
5752 static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = {
5753 .get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter,
5754 .select_se_sh = &gfx_v8_0_select_se_sh,
5755 .read_wave_data = &gfx_v8_0_read_wave_data,
5756 .read_wave_sgprs = &gfx_v8_0_read_wave_sgprs,
5759 static int gfx_v8_0_early_init(void *handle)
5761 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5763 adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
5764 adev->gfx.num_compute_rings = GFX8_NUM_COMPUTE_RINGS;
5765 adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
5766 gfx_v8_0_set_ring_funcs(adev);
5767 gfx_v8_0_set_irq_funcs(adev);
5768 gfx_v8_0_set_gds_init(adev);
5769 gfx_v8_0_set_rlc_funcs(adev);
5774 static int gfx_v8_0_late_init(void *handle)
5776 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5779 r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
5783 r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
5787 /* requires IBs so do in late init after IB pool is initialized */
5788 r = gfx_v8_0_do_edc_gpr_workarounds(adev);
5792 amdgpu_set_powergating_state(adev,
5793 AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_GATE);
5798 static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
5801 if ((adev->asic_type == CHIP_POLARIS11) ||
5802 (adev->asic_type == CHIP_POLARIS12))
5803 /* Send msg to SMU via Powerplay */
5804 amdgpu_set_powergating_state(adev,
5805 AMD_IP_BLOCK_TYPE_SMC,
5807 AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE);
5809 WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0);
5812 static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
5815 WREG32_FIELD(RLC_PG_CNTL, DYN_PER_CU_PG_ENABLE, enable ? 1 : 0);
5818 static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
5821 WREG32_FIELD(RLC_PG_CNTL, QUICK_PG_ENABLE, enable ? 1 : 0);
5824 static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
5827 WREG32_FIELD(RLC_PG_CNTL, GFX_POWER_GATING_ENABLE, enable ? 1 : 0);
5830 static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev,
5833 WREG32_FIELD(RLC_PG_CNTL, GFX_PIPELINE_PG_ENABLE, enable ? 1 : 0);
5835 /* Read any GFX register to wake up GFX. */
5837 RREG32(mmDB_RENDER_CONTROL);
5840 static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev,
5843 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
5844 cz_enable_gfx_cg_power_gating(adev, true);
5845 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
5846 cz_enable_gfx_pipeline_power_gating(adev, true);
5848 cz_enable_gfx_cg_power_gating(adev, false);
5849 cz_enable_gfx_pipeline_power_gating(adev, false);
5853 static int gfx_v8_0_set_powergating_state(void *handle,
5854 enum amd_powergating_state state)
5856 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5857 bool enable = (state == AMD_PG_STATE_GATE);
5859 if (amdgpu_sriov_vf(adev))
5862 switch (adev->asic_type) {
5866 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
5867 cz_enable_sck_slow_down_on_power_up(adev, true);
5868 cz_enable_sck_slow_down_on_power_down(adev, true);
5870 cz_enable_sck_slow_down_on_power_up(adev, false);
5871 cz_enable_sck_slow_down_on_power_down(adev, false);
5873 if (adev->pg_flags & AMD_PG_SUPPORT_CP)
5874 cz_enable_cp_power_gating(adev, true);
5876 cz_enable_cp_power_gating(adev, false);
5878 cz_update_gfx_cg_power_gating(adev, enable);
5880 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5881 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5883 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5885 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5886 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5888 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5890 case CHIP_POLARIS11:
5891 case CHIP_POLARIS12:
5892 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5893 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5895 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5897 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5898 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5900 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5902 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable)
5903 polaris11_enable_gfx_quick_mg_power_gating(adev, true);
5905 polaris11_enable_gfx_quick_mg_power_gating(adev, false);
5914 static void gfx_v8_0_get_clockgating_state(void *handle, u32 *flags)
5916 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5919 if (amdgpu_sriov_vf(adev))
5922 /* AMD_CG_SUPPORT_GFX_MGCG */
5923 data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5924 if (!(data & RLC_CGTT_MGCG_OVERRIDE__CPF_MASK))
5925 *flags |= AMD_CG_SUPPORT_GFX_MGCG;
5927 /* AMD_CG_SUPPORT_GFX_CGLG */
5928 data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5929 if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5930 *flags |= AMD_CG_SUPPORT_GFX_CGCG;
5932 /* AMD_CG_SUPPORT_GFX_CGLS */
5933 if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5934 *flags |= AMD_CG_SUPPORT_GFX_CGLS;
5936 /* AMD_CG_SUPPORT_GFX_CGTS */
5937 data = RREG32(mmCGTS_SM_CTRL_REG);
5938 if (!(data & CGTS_SM_CTRL_REG__OVERRIDE_MASK))
5939 *flags |= AMD_CG_SUPPORT_GFX_CGTS;
5941 /* AMD_CG_SUPPORT_GFX_CGTS_LS */
5942 if (!(data & CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK))
5943 *flags |= AMD_CG_SUPPORT_GFX_CGTS_LS;
5945 /* AMD_CG_SUPPORT_GFX_RLC_LS */
5946 data = RREG32(mmRLC_MEM_SLP_CNTL);
5947 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
5948 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
5950 /* AMD_CG_SUPPORT_GFX_CP_LS */
5951 data = RREG32(mmCP_MEM_SLP_CNTL);
5952 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
5953 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
5956 static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
5957 uint32_t reg_addr, uint32_t cmd)
5961 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
5963 WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5964 WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5966 data = RREG32(mmRLC_SERDES_WR_CTRL);
5967 if (adev->asic_type == CHIP_STONEY)
5968 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5969 RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5970 RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5971 RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5972 RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5973 RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5974 RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5975 RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5976 RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5978 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5979 RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5980 RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5981 RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5982 RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5983 RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5984 RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5985 RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5986 RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
5987 RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
5988 RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5989 data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
5990 (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
5991 (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
5992 (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
5994 WREG32(mmRLC_SERDES_WR_CTRL, data);
5997 #define MSG_ENTER_RLC_SAFE_MODE 1
5998 #define MSG_EXIT_RLC_SAFE_MODE 0
5999 #define RLC_GPR_REG2__REQ_MASK 0x00000001
6000 #define RLC_GPR_REG2__REQ__SHIFT 0
6001 #define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001
6002 #define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e
6004 static void iceland_enter_rlc_safe_mode(struct amdgpu_device *adev)
6009 data = RREG32(mmRLC_CNTL);
6010 if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
6013 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
6014 data |= RLC_SAFE_MODE__CMD_MASK;
6015 data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
6016 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
6017 WREG32(mmRLC_SAFE_MODE, data);
6019 for (i = 0; i < adev->usec_timeout; i++) {
6020 if ((RREG32(mmRLC_GPM_STAT) &
6021 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
6022 RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
6023 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
6024 RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
6029 for (i = 0; i < adev->usec_timeout; i++) {
6030 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
6034 adev->gfx.rlc.in_safe_mode = true;
6038 static void iceland_exit_rlc_safe_mode(struct amdgpu_device *adev)
6043 data = RREG32(mmRLC_CNTL);
6044 if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
6047 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
6048 if (adev->gfx.rlc.in_safe_mode) {
6049 data |= RLC_SAFE_MODE__CMD_MASK;
6050 data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
6051 WREG32(mmRLC_SAFE_MODE, data);
6052 adev->gfx.rlc.in_safe_mode = false;
6056 for (i = 0; i < adev->usec_timeout; i++) {
6057 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
6063 static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
6064 .enter_safe_mode = iceland_enter_rlc_safe_mode,
6065 .exit_safe_mode = iceland_exit_rlc_safe_mode
6068 static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
6071 uint32_t temp, data;
6073 adev->gfx.rlc.funcs->enter_safe_mode(adev);
6075 /* It is disabled by HW by default */
6076 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
6077 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
6078 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS)
6079 /* 1 - RLC memory Light sleep */
6080 WREG32_FIELD(RLC_MEM_SLP_CNTL, RLC_MEM_LS_EN, 1);
6082 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS)
6083 WREG32_FIELD(CP_MEM_SLP_CNTL, CP_MEM_LS_EN, 1);
6086 /* 3 - RLC_CGTT_MGCG_OVERRIDE */
6087 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
6088 if (adev->flags & AMD_IS_APU)
6089 data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
6090 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
6091 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK);
6093 data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
6094 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
6095 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
6096 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
6099 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
6101 /* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6102 gfx_v8_0_wait_for_rlc_serdes(adev);
6104 /* 5 - clear mgcg override */
6105 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
6107 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
6108 /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
6109 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
6110 data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
6111 data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
6112 data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
6113 data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
6114 if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
6115 (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
6116 data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
6117 data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
6118 data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
6120 WREG32(mmCGTS_SM_CTRL_REG, data);
6124 /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6125 gfx_v8_0_wait_for_rlc_serdes(adev);
6127 /* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
6128 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
6129 data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
6130 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
6131 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
6132 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
6134 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
6136 /* 2 - disable MGLS in RLC */
6137 data = RREG32(mmRLC_MEM_SLP_CNTL);
6138 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
6139 data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
6140 WREG32(mmRLC_MEM_SLP_CNTL, data);
6143 /* 3 - disable MGLS in CP */
6144 data = RREG32(mmCP_MEM_SLP_CNTL);
6145 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
6146 data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
6147 WREG32(mmCP_MEM_SLP_CNTL, data);
6150 /* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
6151 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
6152 data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
6153 CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
6155 WREG32(mmCGTS_SM_CTRL_REG, data);
6157 /* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6158 gfx_v8_0_wait_for_rlc_serdes(adev);
6160 /* 6 - set mgcg override */
6161 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
6165 /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6166 gfx_v8_0_wait_for_rlc_serdes(adev);
6169 adev->gfx.rlc.funcs->exit_safe_mode(adev);
6172 static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
6175 uint32_t temp, temp1, data, data1;
6177 temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
6179 adev->gfx.rlc.funcs->enter_safe_mode(adev);
6181 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
6182 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
6183 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
6185 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
6187 /* : wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6188 gfx_v8_0_wait_for_rlc_serdes(adev);
6190 /* 2 - clear cgcg override */
6191 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
6193 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6194 gfx_v8_0_wait_for_rlc_serdes(adev);
6196 /* 3 - write cmd to set CGLS */
6197 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
6199 /* 4 - enable cgcg */
6200 data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
6202 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
6204 data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
6206 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
6207 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
6210 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
6212 data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
6216 WREG32(mmRLC_CGCG_CGLS_CTRL, data);
6218 /* 5 enable cntx_empty_int_enable/cntx_busy_int_enable/
6219 * Cmp_busy/GFX_Idle interrupts
6221 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
6223 /* disable cntx_empty_int_enable & GFX Idle interrupt */
6224 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
6227 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
6228 data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
6229 RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
6231 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
6233 /* read gfx register to wake up cgcg */
6234 RREG32(mmCB_CGTT_SCLK_CTRL);
6235 RREG32(mmCB_CGTT_SCLK_CTRL);
6236 RREG32(mmCB_CGTT_SCLK_CTRL);
6237 RREG32(mmCB_CGTT_SCLK_CTRL);
6239 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6240 gfx_v8_0_wait_for_rlc_serdes(adev);
6242 /* write cmd to Set CGCG Overrride */
6243 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
6245 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6246 gfx_v8_0_wait_for_rlc_serdes(adev);
6248 /* write cmd to Clear CGLS */
6249 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
6251 /* disable cgcg, cgls should be disabled too. */
6252 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
6253 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
6255 WREG32(mmRLC_CGCG_CGLS_CTRL, data);
6258 gfx_v8_0_wait_for_rlc_serdes(adev);
6260 adev->gfx.rlc.funcs->exit_safe_mode(adev);
6262 static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
6266 /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
6267 * === MGCG + MGLS + TS(CG/LS) ===
6269 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
6270 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
6272 /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
6273 * === CGCG + CGLS ===
6275 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
6276 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
6281 static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev,
6282 enum amd_clockgating_state state)
6284 uint32_t msg_id, pp_state = 0;
6285 uint32_t pp_support_state = 0;
6286 void *pp_handle = adev->powerplay.pp_handle;
6288 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
6289 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
6290 pp_support_state = PP_STATE_SUPPORT_LS;
6291 pp_state = PP_STATE_LS;
6293 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
6294 pp_support_state |= PP_STATE_SUPPORT_CG;
6295 pp_state |= PP_STATE_CG;
6297 if (state == AMD_CG_STATE_UNGATE)
6300 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6304 amd_set_clockgating_by_smu(pp_handle, msg_id);
6307 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
6308 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
6309 pp_support_state = PP_STATE_SUPPORT_LS;
6310 pp_state = PP_STATE_LS;
6313 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
6314 pp_support_state |= PP_STATE_SUPPORT_CG;
6315 pp_state |= PP_STATE_CG;
6318 if (state == AMD_CG_STATE_UNGATE)
6321 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6325 amd_set_clockgating_by_smu(pp_handle, msg_id);
6331 static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev,
6332 enum amd_clockgating_state state)
6335 uint32_t msg_id, pp_state = 0;
6336 uint32_t pp_support_state = 0;
6337 void *pp_handle = adev->powerplay.pp_handle;
6339 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
6340 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
6341 pp_support_state = PP_STATE_SUPPORT_LS;
6342 pp_state = PP_STATE_LS;
6344 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
6345 pp_support_state |= PP_STATE_SUPPORT_CG;
6346 pp_state |= PP_STATE_CG;
6348 if (state == AMD_CG_STATE_UNGATE)
6351 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6355 amd_set_clockgating_by_smu(pp_handle, msg_id);
6358 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_GFX_3D_CGLS)) {
6359 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) {
6360 pp_support_state = PP_STATE_SUPPORT_LS;
6361 pp_state = PP_STATE_LS;
6363 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) {
6364 pp_support_state |= PP_STATE_SUPPORT_CG;
6365 pp_state |= PP_STATE_CG;
6367 if (state == AMD_CG_STATE_UNGATE)
6370 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6374 amd_set_clockgating_by_smu(pp_handle, msg_id);
6377 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
6378 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
6379 pp_support_state = PP_STATE_SUPPORT_LS;
6380 pp_state = PP_STATE_LS;
6383 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
6384 pp_support_state |= PP_STATE_SUPPORT_CG;
6385 pp_state |= PP_STATE_CG;
6388 if (state == AMD_CG_STATE_UNGATE)
6391 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6395 amd_set_clockgating_by_smu(pp_handle, msg_id);
6398 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
6399 pp_support_state = PP_STATE_SUPPORT_LS;
6401 if (state == AMD_CG_STATE_UNGATE)
6404 pp_state = PP_STATE_LS;
6406 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6410 amd_set_clockgating_by_smu(pp_handle, msg_id);
6413 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
6414 pp_support_state = PP_STATE_SUPPORT_LS;
6416 if (state == AMD_CG_STATE_UNGATE)
6419 pp_state = PP_STATE_LS;
6420 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6424 amd_set_clockgating_by_smu(pp_handle, msg_id);
6430 static int gfx_v8_0_set_clockgating_state(void *handle,
6431 enum amd_clockgating_state state)
6433 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
6435 if (amdgpu_sriov_vf(adev))
6438 switch (adev->asic_type) {
6442 gfx_v8_0_update_gfx_clock_gating(adev,
6443 state == AMD_CG_STATE_GATE);
6446 gfx_v8_0_tonga_update_gfx_clock_gating(adev, state);
6448 case CHIP_POLARIS10:
6449 case CHIP_POLARIS11:
6450 case CHIP_POLARIS12:
6451 gfx_v8_0_polaris_update_gfx_clock_gating(adev, state);
6459 static u64 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring)
6461 return ring->adev->wb.wb[ring->rptr_offs];
6464 static u64 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
6466 struct amdgpu_device *adev = ring->adev;
6468 if (ring->use_doorbell)
6469 /* XXX check if swapping is necessary on BE */
6470 return ring->adev->wb.wb[ring->wptr_offs];
6472 return RREG32(mmCP_RB0_WPTR);
6475 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
6477 struct amdgpu_device *adev = ring->adev;
6479 if (ring->use_doorbell) {
6480 /* XXX check if swapping is necessary on BE */
6481 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6482 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6484 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
6485 (void)RREG32(mmCP_RB0_WPTR);
6489 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
6491 u32 ref_and_mask, reg_mem_engine;
6493 if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) ||
6494 (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)) {
6497 ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
6500 ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
6507 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
6508 reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
6511 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6512 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
6513 WAIT_REG_MEM_FUNCTION(3) | /* == */
6515 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
6516 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
6517 amdgpu_ring_write(ring, ref_and_mask);
6518 amdgpu_ring_write(ring, ref_and_mask);
6519 amdgpu_ring_write(ring, 0x20); /* poll interval */
6522 static void gfx_v8_0_ring_emit_vgt_flush(struct amdgpu_ring *ring)
6524 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6525 amdgpu_ring_write(ring, EVENT_TYPE(VS_PARTIAL_FLUSH) |
6528 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6529 amdgpu_ring_write(ring, EVENT_TYPE(VGT_FLUSH) |
6534 static void gfx_v8_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
6536 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6537 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6538 WRITE_DATA_DST_SEL(0) |
6540 amdgpu_ring_write(ring, mmHDP_DEBUG0);
6541 amdgpu_ring_write(ring, 0);
6542 amdgpu_ring_write(ring, 1);
6546 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
6547 struct amdgpu_ib *ib,
6548 unsigned vm_id, bool ctx_switch)
6550 u32 header, control = 0;
6552 if (ib->flags & AMDGPU_IB_FLAG_CE)
6553 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
6555 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
6557 control |= ib->length_dw | (vm_id << 24);
6559 if (amdgpu_sriov_vf(ring->adev) && ib->flags & AMDGPU_IB_FLAG_PREEMPT)
6560 control |= INDIRECT_BUFFER_PRE_ENB(1);
6562 amdgpu_ring_write(ring, header);
6563 amdgpu_ring_write(ring,
6567 (ib->gpu_addr & 0xFFFFFFFC));
6568 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6569 amdgpu_ring_write(ring, control);
6572 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
6573 struct amdgpu_ib *ib,
6574 unsigned vm_id, bool ctx_switch)
6576 u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vm_id << 24);
6578 amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
6579 amdgpu_ring_write(ring,
6583 (ib->gpu_addr & 0xFFFFFFFC));
6584 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6585 amdgpu_ring_write(ring, control);
6588 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
6589 u64 seq, unsigned flags)
6591 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6592 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6594 /* EVENT_WRITE_EOP - flush caches, send int */
6595 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6596 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6598 EOP_TC_WB_ACTION_EN |
6599 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6601 amdgpu_ring_write(ring, addr & 0xfffffffc);
6602 amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
6603 DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6604 amdgpu_ring_write(ring, lower_32_bits(seq));
6605 amdgpu_ring_write(ring, upper_32_bits(seq));
6609 static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
6611 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6612 uint32_t seq = ring->fence_drv.sync_seq;
6613 uint64_t addr = ring->fence_drv.gpu_addr;
6615 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6616 amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
6617 WAIT_REG_MEM_FUNCTION(3) | /* equal */
6618 WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
6619 amdgpu_ring_write(ring, addr & 0xfffffffc);
6620 amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
6621 amdgpu_ring_write(ring, seq);
6622 amdgpu_ring_write(ring, 0xffffffff);
6623 amdgpu_ring_write(ring, 4); /* poll interval */
6626 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
6627 unsigned vm_id, uint64_t pd_addr)
6629 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6631 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6632 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6633 WRITE_DATA_DST_SEL(0)) |
6636 amdgpu_ring_write(ring,
6637 (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id));
6639 amdgpu_ring_write(ring,
6640 (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vm_id - 8));
6642 amdgpu_ring_write(ring, 0);
6643 amdgpu_ring_write(ring, pd_addr >> 12);
6645 /* bits 0-15 are the VM contexts0-15 */
6646 /* invalidate the cache */
6647 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6648 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6649 WRITE_DATA_DST_SEL(0)));
6650 amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6651 amdgpu_ring_write(ring, 0);
6652 amdgpu_ring_write(ring, 1 << vm_id);
6654 /* wait for the invalidate to complete */
6655 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6656 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6657 WAIT_REG_MEM_FUNCTION(0) | /* always */
6658 WAIT_REG_MEM_ENGINE(0))); /* me */
6659 amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6660 amdgpu_ring_write(ring, 0);
6661 amdgpu_ring_write(ring, 0); /* ref */
6662 amdgpu_ring_write(ring, 0); /* mask */
6663 amdgpu_ring_write(ring, 0x20); /* poll interval */
6665 /* compute doesn't have PFP */
6667 /* sync PFP to ME, otherwise we might get invalid PFP reads */
6668 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6669 amdgpu_ring_write(ring, 0x0);
6673 static u64 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
6675 return ring->adev->wb.wb[ring->wptr_offs];
6678 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
6680 struct amdgpu_device *adev = ring->adev;
6682 /* XXX check if swapping is necessary on BE */
6683 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6684 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6687 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
6691 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6692 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6694 /* RELEASE_MEM - flush caches, send int */
6695 amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
6696 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6698 EOP_TC_WB_ACTION_EN |
6699 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6701 amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6702 amdgpu_ring_write(ring, addr & 0xfffffffc);
6703 amdgpu_ring_write(ring, upper_32_bits(addr));
6704 amdgpu_ring_write(ring, lower_32_bits(seq));
6705 amdgpu_ring_write(ring, upper_32_bits(seq));
6708 static void gfx_v8_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
6709 u64 seq, unsigned int flags)
6711 /* we only allocate 32bit for each seq wb address */
6712 BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
6714 /* write fence seq to the "addr" */
6715 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6716 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6717 WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
6718 amdgpu_ring_write(ring, lower_32_bits(addr));
6719 amdgpu_ring_write(ring, upper_32_bits(addr));
6720 amdgpu_ring_write(ring, lower_32_bits(seq));
6722 if (flags & AMDGPU_FENCE_FLAG_INT) {
6723 /* set register to trigger INT */
6724 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6725 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6726 WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
6727 amdgpu_ring_write(ring, mmCPC_INT_STATUS);
6728 amdgpu_ring_write(ring, 0);
6729 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
6733 static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring)
6735 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6736 amdgpu_ring_write(ring, 0);
6739 static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
6743 if (amdgpu_sriov_vf(ring->adev))
6744 gfx_v8_0_ring_emit_ce_meta_init(ring,
6745 (flags & AMDGPU_VM_DOMAIN) ? AMDGPU_CSA_VADDR : ring->adev->virt.csa_vmid0_addr);
6747 dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
6748 if (flags & AMDGPU_HAVE_CTX_SWITCH) {
6749 gfx_v8_0_ring_emit_vgt_flush(ring);
6750 /* set load_global_config & load_global_uconfig */
6752 /* set load_cs_sh_regs */
6754 /* set load_per_context_state & load_gfx_sh_regs for GFX */
6757 /* set load_ce_ram if preamble presented */
6758 if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
6761 /* still load_ce_ram if this is the first time preamble presented
6762 * although there is no context switch happens.
6764 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
6768 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6769 amdgpu_ring_write(ring, dw2);
6770 amdgpu_ring_write(ring, 0);
6772 if (amdgpu_sriov_vf(ring->adev))
6773 gfx_v8_0_ring_emit_de_meta_init(ring,
6774 (flags & AMDGPU_VM_DOMAIN) ? AMDGPU_CSA_VADDR : ring->adev->virt.csa_vmid0_addr);
6777 static unsigned gfx_v8_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
6781 amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
6782 amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
6783 amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
6784 amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
6785 ret = ring->wptr & ring->buf_mask;
6786 amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
6790 static void gfx_v8_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
6794 BUG_ON(offset > ring->buf_mask);
6795 BUG_ON(ring->ring[offset] != 0x55aa55aa);
6797 cur = (ring->wptr & ring->buf_mask) - 1;
6798 if (likely(cur > offset))
6799 ring->ring[offset] = cur - offset;
6801 ring->ring[offset] = (ring->ring_size >> 2) - offset + cur;
6805 static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
6807 struct amdgpu_device *adev = ring->adev;
6809 amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
6810 amdgpu_ring_write(ring, 0 | /* src: register*/
6811 (5 << 8) | /* dst: memory */
6812 (1 << 20)); /* write confirm */
6813 amdgpu_ring_write(ring, reg);
6814 amdgpu_ring_write(ring, 0);
6815 amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
6816 adev->virt.reg_val_offs * 4));
6817 amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
6818 adev->virt.reg_val_offs * 4));
6821 static void gfx_v8_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
6824 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6825 amdgpu_ring_write(ring, (1 << 16)); /* no inc addr */
6826 amdgpu_ring_write(ring, reg);
6827 amdgpu_ring_write(ring, 0);
6828 amdgpu_ring_write(ring, val);
6831 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
6832 enum amdgpu_interrupt_state state)
6834 WREG32_FIELD(CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE,
6835 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6838 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
6840 enum amdgpu_interrupt_state state)
6843 * amdgpu controls only pipe 0 of MEC1. That's why this function only
6844 * handles the setting of interrupts for this specific pipe. All other
6845 * pipes' interrupts are set by amdkfd.
6853 DRM_DEBUG("invalid pipe %d\n", pipe);
6857 DRM_DEBUG("invalid me %d\n", me);
6861 WREG32_FIELD(CP_ME1_PIPE0_INT_CNTL, TIME_STAMP_INT_ENABLE,
6862 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6865 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
6866 struct amdgpu_irq_src *source,
6868 enum amdgpu_interrupt_state state)
6870 WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE,
6871 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6876 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
6877 struct amdgpu_irq_src *source,
6879 enum amdgpu_interrupt_state state)
6881 WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE,
6882 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6887 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6888 struct amdgpu_irq_src *src,
6890 enum amdgpu_interrupt_state state)
6893 case AMDGPU_CP_IRQ_GFX_EOP:
6894 gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
6896 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6897 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6899 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6900 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6902 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6903 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6905 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6906 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6908 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6909 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6911 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6912 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6914 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6915 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6917 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6918 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6926 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
6927 struct amdgpu_irq_src *source,
6928 struct amdgpu_iv_entry *entry)
6931 u8 me_id, pipe_id, queue_id;
6932 struct amdgpu_ring *ring;
6934 DRM_DEBUG("IH: CP EOP\n");
6935 me_id = (entry->ring_id & 0x0c) >> 2;
6936 pipe_id = (entry->ring_id & 0x03) >> 0;
6937 queue_id = (entry->ring_id & 0x70) >> 4;
6941 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6945 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6946 ring = &adev->gfx.compute_ring[i];
6947 /* Per-queue interrupt is supported for MEC starting from VI.
6948 * The interrupt can only be enabled/disabled per pipe instead of per queue.
6950 if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6951 amdgpu_fence_process(ring);
6958 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
6959 struct amdgpu_irq_src *source,
6960 struct amdgpu_iv_entry *entry)
6962 DRM_ERROR("Illegal register access in command stream\n");
6963 schedule_work(&adev->reset_work);
6967 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
6968 struct amdgpu_irq_src *source,
6969 struct amdgpu_iv_entry *entry)
6971 DRM_ERROR("Illegal instruction in command stream\n");
6972 schedule_work(&adev->reset_work);
6976 static int gfx_v8_0_kiq_set_interrupt_state(struct amdgpu_device *adev,
6977 struct amdgpu_irq_src *src,
6979 enum amdgpu_interrupt_state state)
6981 struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
6983 BUG_ON(ring->funcs->type != AMDGPU_RING_TYPE_KIQ);
6986 case AMDGPU_CP_KIQ_IRQ_DRIVER0:
6987 WREG32_FIELD(CPC_INT_CNTL, GENERIC2_INT_ENABLE,
6988 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6990 WREG32_FIELD_OFFSET(CP_ME1_PIPE0_INT_CNTL,
6992 GENERIC2_INT_ENABLE,
6993 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6995 WREG32_FIELD_OFFSET(CP_ME2_PIPE0_INT_CNTL,
6997 GENERIC2_INT_ENABLE,
6998 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
7001 BUG(); /* kiq only support GENERIC2_INT now */
7007 static int gfx_v8_0_kiq_irq(struct amdgpu_device *adev,
7008 struct amdgpu_irq_src *source,
7009 struct amdgpu_iv_entry *entry)
7011 u8 me_id, pipe_id, queue_id;
7012 struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
7014 BUG_ON(ring->funcs->type != AMDGPU_RING_TYPE_KIQ);
7016 me_id = (entry->ring_id & 0x0c) >> 2;
7017 pipe_id = (entry->ring_id & 0x03) >> 0;
7018 queue_id = (entry->ring_id & 0x70) >> 4;
7019 DRM_DEBUG("IH: CPC GENERIC2_INT, me:%d, pipe:%d, queue:%d\n",
7020 me_id, pipe_id, queue_id);
7022 amdgpu_fence_process(ring);
7026 static const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
7028 .early_init = gfx_v8_0_early_init,
7029 .late_init = gfx_v8_0_late_init,
7030 .sw_init = gfx_v8_0_sw_init,
7031 .sw_fini = gfx_v8_0_sw_fini,
7032 .hw_init = gfx_v8_0_hw_init,
7033 .hw_fini = gfx_v8_0_hw_fini,
7034 .suspend = gfx_v8_0_suspend,
7035 .resume = gfx_v8_0_resume,
7036 .is_idle = gfx_v8_0_is_idle,
7037 .wait_for_idle = gfx_v8_0_wait_for_idle,
7038 .check_soft_reset = gfx_v8_0_check_soft_reset,
7039 .pre_soft_reset = gfx_v8_0_pre_soft_reset,
7040 .soft_reset = gfx_v8_0_soft_reset,
7041 .post_soft_reset = gfx_v8_0_post_soft_reset,
7042 .set_clockgating_state = gfx_v8_0_set_clockgating_state,
7043 .set_powergating_state = gfx_v8_0_set_powergating_state,
7044 .get_clockgating_state = gfx_v8_0_get_clockgating_state,
7047 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
7048 .type = AMDGPU_RING_TYPE_GFX,
7050 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
7051 .support_64bit_ptrs = false,
7052 .get_rptr = gfx_v8_0_ring_get_rptr,
7053 .get_wptr = gfx_v8_0_ring_get_wptr_gfx,
7054 .set_wptr = gfx_v8_0_ring_set_wptr_gfx,
7055 .emit_frame_size = /* maximum 215dw if count 16 IBs in */
7057 7 + /* PIPELINE_SYNC */
7059 8 + /* FENCE for VM_FLUSH */
7060 20 + /* GDS switch */
7061 4 + /* double SWITCH_BUFFER,
7062 the first COND_EXEC jump to the place just
7063 prior to this double SWITCH_BUFFER */
7071 8 + 8 + /* FENCE x2 */
7072 2, /* SWITCH_BUFFER */
7073 .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_gfx */
7074 .emit_ib = gfx_v8_0_ring_emit_ib_gfx,
7075 .emit_fence = gfx_v8_0_ring_emit_fence_gfx,
7076 .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
7077 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
7078 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
7079 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
7080 .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
7081 .test_ring = gfx_v8_0_ring_test_ring,
7082 .test_ib = gfx_v8_0_ring_test_ib,
7083 .insert_nop = amdgpu_ring_insert_nop,
7084 .pad_ib = amdgpu_ring_generic_pad_ib,
7085 .emit_switch_buffer = gfx_v8_ring_emit_sb,
7086 .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
7087 .init_cond_exec = gfx_v8_0_ring_emit_init_cond_exec,
7088 .patch_cond_exec = gfx_v8_0_ring_emit_patch_cond_exec,
7091 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
7092 .type = AMDGPU_RING_TYPE_COMPUTE,
7094 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
7095 .support_64bit_ptrs = false,
7096 .get_rptr = gfx_v8_0_ring_get_rptr,
7097 .get_wptr = gfx_v8_0_ring_get_wptr_compute,
7098 .set_wptr = gfx_v8_0_ring_set_wptr_compute,
7100 20 + /* gfx_v8_0_ring_emit_gds_switch */
7101 7 + /* gfx_v8_0_ring_emit_hdp_flush */
7102 5 + /* gfx_v8_0_ring_emit_hdp_invalidate */
7103 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
7104 17 + /* gfx_v8_0_ring_emit_vm_flush */
7105 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
7106 .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */
7107 .emit_ib = gfx_v8_0_ring_emit_ib_compute,
7108 .emit_fence = gfx_v8_0_ring_emit_fence_compute,
7109 .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
7110 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
7111 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
7112 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
7113 .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
7114 .test_ring = gfx_v8_0_ring_test_ring,
7115 .test_ib = gfx_v8_0_ring_test_ib,
7116 .insert_nop = amdgpu_ring_insert_nop,
7117 .pad_ib = amdgpu_ring_generic_pad_ib,
7120 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {
7121 .type = AMDGPU_RING_TYPE_KIQ,
7123 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
7124 .support_64bit_ptrs = false,
7125 .get_rptr = gfx_v8_0_ring_get_rptr,
7126 .get_wptr = gfx_v8_0_ring_get_wptr_compute,
7127 .set_wptr = gfx_v8_0_ring_set_wptr_compute,
7129 20 + /* gfx_v8_0_ring_emit_gds_switch */
7130 7 + /* gfx_v8_0_ring_emit_hdp_flush */
7131 5 + /* gfx_v8_0_ring_emit_hdp_invalidate */
7132 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
7133 17 + /* gfx_v8_0_ring_emit_vm_flush */
7134 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */
7135 .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */
7136 .emit_ib = gfx_v8_0_ring_emit_ib_compute,
7137 .emit_fence = gfx_v8_0_ring_emit_fence_kiq,
7138 .test_ring = gfx_v8_0_ring_test_ring,
7139 .test_ib = gfx_v8_0_ring_test_ib,
7140 .insert_nop = amdgpu_ring_insert_nop,
7141 .pad_ib = amdgpu_ring_generic_pad_ib,
7142 .emit_rreg = gfx_v8_0_ring_emit_rreg,
7143 .emit_wreg = gfx_v8_0_ring_emit_wreg,
7146 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
7150 adev->gfx.kiq.ring.funcs = &gfx_v8_0_ring_funcs_kiq;
7152 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
7153 adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
7155 for (i = 0; i < adev->gfx.num_compute_rings; i++)
7156 adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
7159 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
7160 .set = gfx_v8_0_set_eop_interrupt_state,
7161 .process = gfx_v8_0_eop_irq,
7164 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
7165 .set = gfx_v8_0_set_priv_reg_fault_state,
7166 .process = gfx_v8_0_priv_reg_irq,
7169 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
7170 .set = gfx_v8_0_set_priv_inst_fault_state,
7171 .process = gfx_v8_0_priv_inst_irq,
7174 static const struct amdgpu_irq_src_funcs gfx_v8_0_kiq_irq_funcs = {
7175 .set = gfx_v8_0_kiq_set_interrupt_state,
7176 .process = gfx_v8_0_kiq_irq,
7179 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
7181 adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
7182 adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
7184 adev->gfx.priv_reg_irq.num_types = 1;
7185 adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
7187 adev->gfx.priv_inst_irq.num_types = 1;
7188 adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
7190 adev->gfx.kiq.irq.num_types = AMDGPU_CP_KIQ_IRQ_LAST;
7191 adev->gfx.kiq.irq.funcs = &gfx_v8_0_kiq_irq_funcs;
7194 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
7196 adev->gfx.rlc.funcs = &iceland_rlc_funcs;
7199 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
7201 /* init asci gds info */
7202 adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
7203 adev->gds.gws.total_size = 64;
7204 adev->gds.oa.total_size = 16;
7206 if (adev->gds.mem.total_size == 64 * 1024) {
7207 adev->gds.mem.gfx_partition_size = 4096;
7208 adev->gds.mem.cs_partition_size = 4096;
7210 adev->gds.gws.gfx_partition_size = 4;
7211 adev->gds.gws.cs_partition_size = 4;
7213 adev->gds.oa.gfx_partition_size = 4;
7214 adev->gds.oa.cs_partition_size = 1;
7216 adev->gds.mem.gfx_partition_size = 1024;
7217 adev->gds.mem.cs_partition_size = 1024;
7219 adev->gds.gws.gfx_partition_size = 16;
7220 adev->gds.gws.cs_partition_size = 16;
7222 adev->gds.oa.gfx_partition_size = 4;
7223 adev->gds.oa.cs_partition_size = 4;
7227 static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
7235 data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7236 data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7238 WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
7241 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
7245 data = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) |
7246 RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
7248 mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_cu_per_sh);
7250 return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask;
7253 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
7255 int i, j, k, counter, active_cu_number = 0;
7256 u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
7257 struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
7258 unsigned disable_masks[4 * 2];
7260 memset(cu_info, 0, sizeof(*cu_info));
7262 amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
7264 mutex_lock(&adev->grbm_idx_mutex);
7265 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
7266 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
7270 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
7272 gfx_v8_0_set_user_cu_inactive_bitmap(
7273 adev, disable_masks[i * 2 + j]);
7274 bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
7275 cu_info->bitmap[i][j] = bitmap;
7277 for (k = 0; k < 16; k ++) {
7278 if (bitmap & mask) {
7285 active_cu_number += counter;
7286 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
7289 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
7290 mutex_unlock(&adev->grbm_idx_mutex);
7292 cu_info->number = active_cu_number;
7293 cu_info->ao_cu_mask = ao_cu_mask;
7296 const struct amdgpu_ip_block_version gfx_v8_0_ip_block =
7298 .type = AMD_IP_BLOCK_TYPE_GFX,
7302 .funcs = &gfx_v8_0_ip_funcs,
7305 const struct amdgpu_ip_block_version gfx_v8_1_ip_block =
7307 .type = AMD_IP_BLOCK_TYPE_GFX,
7311 .funcs = &gfx_v8_0_ip_funcs,
7314 static void gfx_v8_0_ring_emit_ce_meta_init(struct amdgpu_ring *ring, uint64_t csa_addr)
7316 uint64_t ce_payload_addr;
7319 struct vi_ce_ib_state regular;
7320 struct vi_ce_ib_state_chained_ib chained;
7323 if (ring->adev->virt.chained_ib_support) {
7324 ce_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data_chained_ib, ce_payload);
7325 cnt_ce = (sizeof(ce_payload.chained) >> 2) + 4 - 2;
7327 ce_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data, ce_payload);
7328 cnt_ce = (sizeof(ce_payload.regular) >> 2) + 4 - 2;
7331 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_ce));
7332 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
7333 WRITE_DATA_DST_SEL(8) |
7335 WRITE_DATA_CACHE_POLICY(0));
7336 amdgpu_ring_write(ring, lower_32_bits(ce_payload_addr));
7337 amdgpu_ring_write(ring, upper_32_bits(ce_payload_addr));
7338 amdgpu_ring_write_multiple(ring, (void *)&ce_payload, cnt_ce - 2);
7341 static void gfx_v8_0_ring_emit_de_meta_init(struct amdgpu_ring *ring, uint64_t csa_addr)
7343 uint64_t de_payload_addr, gds_addr;
7346 struct vi_de_ib_state regular;
7347 struct vi_de_ib_state_chained_ib chained;
7350 gds_addr = csa_addr + 4096;
7351 if (ring->adev->virt.chained_ib_support) {
7352 de_payload.chained.gds_backup_addrlo = lower_32_bits(gds_addr);
7353 de_payload.chained.gds_backup_addrhi = upper_32_bits(gds_addr);
7354 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data_chained_ib, de_payload);
7355 cnt_de = (sizeof(de_payload.chained) >> 2) + 4 - 2;
7357 de_payload.regular.gds_backup_addrlo = lower_32_bits(gds_addr);
7358 de_payload.regular.gds_backup_addrhi = upper_32_bits(gds_addr);
7359 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data, de_payload);
7360 cnt_de = (sizeof(de_payload.regular) >> 2) + 4 - 2;
7363 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_de));
7364 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
7365 WRITE_DATA_DST_SEL(8) |
7367 WRITE_DATA_CACHE_POLICY(0));
7368 amdgpu_ring_write(ring, lower_32_bits(de_payload_addr));
7369 amdgpu_ring_write(ring, upper_32_bits(de_payload_addr));
7370 amdgpu_ring_write_multiple(ring, (void *)&de_payload, cnt_de - 2);
7373 /* create MQD for each compute queue */
7374 static int gfx_v8_0_compute_mqd_sw_init(struct amdgpu_device *adev)
7376 struct amdgpu_ring *ring = NULL;
7379 /* create MQD for KIQ */
7380 ring = &adev->gfx.kiq.ring;
7381 if (!ring->mqd_obj) {
7382 r = amdgpu_bo_create_kernel(adev, sizeof(struct vi_mqd), PAGE_SIZE,
7383 AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj,
7384 &ring->mqd_gpu_addr, &ring->mqd_ptr);
7386 dev_warn(adev->dev, "failed to create ring mqd ob (%d)", r);
7390 /* prepare MQD backup */
7391 adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS] = kmalloc(sizeof(struct vi_mqd), GFP_KERNEL);
7392 if (!adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS])
7393 dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name);
7396 /* create MQD for each KCQ */
7397 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
7398 ring = &adev->gfx.compute_ring[i];
7399 if (!ring->mqd_obj) {
7400 r = amdgpu_bo_create_kernel(adev, sizeof(struct vi_mqd), PAGE_SIZE,
7401 AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj,
7402 &ring->mqd_gpu_addr, &ring->mqd_ptr);
7404 dev_warn(adev->dev, "failed to create ring mqd ob (%d)", r);
7408 /* prepare MQD backup */
7409 adev->gfx.mec.mqd_backup[i] = kmalloc(sizeof(struct vi_mqd), GFP_KERNEL);
7410 if (!adev->gfx.mec.mqd_backup[i])
7411 dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name);
7418 static void gfx_v8_0_compute_mqd_sw_fini(struct amdgpu_device *adev)
7420 struct amdgpu_ring *ring = NULL;
7423 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
7424 ring = &adev->gfx.compute_ring[i];
7425 kfree(adev->gfx.mec.mqd_backup[i]);
7426 amdgpu_bo_free_kernel(&ring->mqd_obj,
7427 &ring->mqd_gpu_addr,
7431 ring = &adev->gfx.kiq.ring;
7432 kfree(adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS]);
7433 amdgpu_bo_free_kernel(&ring->mqd_obj,
7434 &ring->mqd_gpu_addr,