2 * Copyright 2014 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
24 #include <linux/delay.h>
25 #include <linux/kernel.h>
26 #include <linux/firmware.h>
27 #include <linux/module.h>
28 #include <linux/pci.h>
31 #include "amdgpu_gfx.h"
33 #include "vi_structs.h"
35 #include "amdgpu_ucode.h"
36 #include "amdgpu_atombios.h"
37 #include "atombios_i2c.h"
38 #include "clearstate_vi.h"
40 #include "gmc/gmc_8_2_d.h"
41 #include "gmc/gmc_8_2_sh_mask.h"
43 #include "oss/oss_3_0_d.h"
44 #include "oss/oss_3_0_sh_mask.h"
46 #include "bif/bif_5_0_d.h"
47 #include "bif/bif_5_0_sh_mask.h"
48 #include "gca/gfx_8_0_d.h"
49 #include "gca/gfx_8_0_enum.h"
50 #include "gca/gfx_8_0_sh_mask.h"
52 #include "dce/dce_10_0_d.h"
53 #include "dce/dce_10_0_sh_mask.h"
55 #include "smu/smu_7_1_3_d.h"
57 #include "ivsrcid/ivsrcid_vislands30.h"
59 #define GFX8_NUM_GFX_RINGS 1
60 #define GFX8_MEC_HPD_SIZE 4096
62 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
63 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
64 #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
65 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
67 #define ARRAY_MODE(x) ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
68 #define PIPE_CONFIG(x) ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
69 #define TILE_SPLIT(x) ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
70 #define MICRO_TILE_MODE_NEW(x) ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
71 #define SAMPLE_SPLIT(x) ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
72 #define BANK_WIDTH(x) ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
73 #define BANK_HEIGHT(x) ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
74 #define MACRO_TILE_ASPECT(x) ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
75 #define NUM_BANKS(x) ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
77 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK 0x00000001L
78 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK 0x00000002L
79 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK 0x00000004L
80 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK 0x00000008L
81 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK 0x00000010L
82 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK 0x00000020L
85 #define SET_BPM_SERDES_CMD 1
86 #define CLE_BPM_SERDES_CMD 0
88 /* BPM Register Address*/
90 BPM_REG_CGLS_EN = 0, /* Enable/Disable CGLS */
91 BPM_REG_CGLS_ON, /* ON/OFF CGLS: shall be controlled by RLC FW */
92 BPM_REG_CGCG_OVERRIDE, /* Set/Clear CGCG Override */
93 BPM_REG_MGCG_OVERRIDE, /* Set/Clear MGCG Override */
94 BPM_REG_FGCG_OVERRIDE, /* Set/Clear FGCG Override */
98 #define RLC_FormatDirectRegListLength 14
100 MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
101 MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
102 MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
103 MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
104 MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
105 MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
107 MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
108 MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
109 MODULE_FIRMWARE("amdgpu/stoney_me.bin");
110 MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
111 MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
113 MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
114 MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
115 MODULE_FIRMWARE("amdgpu/tonga_me.bin");
116 MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
117 MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
118 MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
120 MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
121 MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
122 MODULE_FIRMWARE("amdgpu/topaz_me.bin");
123 MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
124 MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
126 MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
127 MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
128 MODULE_FIRMWARE("amdgpu/fiji_me.bin");
129 MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
130 MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
131 MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
133 MODULE_FIRMWARE("amdgpu/polaris10_ce.bin");
134 MODULE_FIRMWARE("amdgpu/polaris10_ce_2.bin");
135 MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin");
136 MODULE_FIRMWARE("amdgpu/polaris10_pfp_2.bin");
137 MODULE_FIRMWARE("amdgpu/polaris10_me.bin");
138 MODULE_FIRMWARE("amdgpu/polaris10_me_2.bin");
139 MODULE_FIRMWARE("amdgpu/polaris10_mec.bin");
140 MODULE_FIRMWARE("amdgpu/polaris10_mec_2.bin");
141 MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin");
142 MODULE_FIRMWARE("amdgpu/polaris10_mec2_2.bin");
143 MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin");
145 MODULE_FIRMWARE("amdgpu/polaris11_ce.bin");
146 MODULE_FIRMWARE("amdgpu/polaris11_ce_2.bin");
147 MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin");
148 MODULE_FIRMWARE("amdgpu/polaris11_pfp_2.bin");
149 MODULE_FIRMWARE("amdgpu/polaris11_me.bin");
150 MODULE_FIRMWARE("amdgpu/polaris11_me_2.bin");
151 MODULE_FIRMWARE("amdgpu/polaris11_mec.bin");
152 MODULE_FIRMWARE("amdgpu/polaris11_mec_2.bin");
153 MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin");
154 MODULE_FIRMWARE("amdgpu/polaris11_mec2_2.bin");
155 MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin");
157 MODULE_FIRMWARE("amdgpu/polaris12_ce.bin");
158 MODULE_FIRMWARE("amdgpu/polaris12_ce_2.bin");
159 MODULE_FIRMWARE("amdgpu/polaris12_pfp.bin");
160 MODULE_FIRMWARE("amdgpu/polaris12_pfp_2.bin");
161 MODULE_FIRMWARE("amdgpu/polaris12_me.bin");
162 MODULE_FIRMWARE("amdgpu/polaris12_me_2.bin");
163 MODULE_FIRMWARE("amdgpu/polaris12_mec.bin");
164 MODULE_FIRMWARE("amdgpu/polaris12_mec_2.bin");
165 MODULE_FIRMWARE("amdgpu/polaris12_mec2.bin");
166 MODULE_FIRMWARE("amdgpu/polaris12_mec2_2.bin");
167 MODULE_FIRMWARE("amdgpu/polaris12_rlc.bin");
169 MODULE_FIRMWARE("amdgpu/vegam_ce.bin");
170 MODULE_FIRMWARE("amdgpu/vegam_pfp.bin");
171 MODULE_FIRMWARE("amdgpu/vegam_me.bin");
172 MODULE_FIRMWARE("amdgpu/vegam_mec.bin");
173 MODULE_FIRMWARE("amdgpu/vegam_mec2.bin");
174 MODULE_FIRMWARE("amdgpu/vegam_rlc.bin");
176 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
178 {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
179 {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
180 {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
181 {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
182 {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
183 {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
184 {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
185 {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
186 {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
187 {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
188 {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
189 {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
190 {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
191 {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
192 {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
193 {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
196 static const u32 golden_settings_tonga_a11[] =
198 mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
199 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
200 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
201 mmGB_GPU_ID, 0x0000000f, 0x00000000,
202 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
203 mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
204 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
205 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
206 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
207 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
208 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
209 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
210 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
211 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
212 mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
213 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
216 static const u32 tonga_golden_common_all[] =
218 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
219 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
220 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
221 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
222 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
223 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
224 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
225 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
228 static const u32 tonga_mgcg_cgcg_init[] =
230 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
231 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
232 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
233 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
234 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
235 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
236 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
237 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
238 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
239 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
240 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
241 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
242 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
243 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
244 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
245 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
246 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
247 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
248 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
249 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
250 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
251 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
252 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
253 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
254 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
255 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
256 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
257 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
258 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
259 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
260 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
261 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
262 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
263 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
264 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
265 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
266 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
267 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
268 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
269 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
270 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
271 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
272 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
273 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
274 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
275 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
276 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
277 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
278 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
279 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
280 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
281 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
282 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
283 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
284 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
285 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
286 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
287 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
288 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
289 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
290 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
291 mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
292 mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
293 mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
294 mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
295 mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
296 mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
297 mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
298 mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
299 mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
300 mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
301 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
302 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
303 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
304 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
307 static const u32 golden_settings_vegam_a11[] =
309 mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
310 mmCB_HW_CONTROL_2, 0x0f000000, 0x0d000000,
311 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
312 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
313 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
314 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
315 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x3a00161a,
316 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002e,
317 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
318 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
319 mmSQ_CONFIG, 0x07f80000, 0x01180000,
320 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
321 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
322 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
323 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
324 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x32761054,
325 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
328 static const u32 vegam_golden_common_all[] =
330 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
331 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
332 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
333 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
334 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
335 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
338 static const u32 golden_settings_polaris11_a11[] =
340 mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208,
341 mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
342 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
343 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
344 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
345 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
346 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
347 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
348 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
349 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
350 mmSQ_CONFIG, 0x07f80000, 0x01180000,
351 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
352 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
353 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
354 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
355 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
356 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
359 static const u32 polaris11_golden_common_all[] =
361 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
362 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002,
363 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
364 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
365 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
366 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
369 static const u32 golden_settings_polaris10_a11[] =
371 mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
372 mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
373 mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
374 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
375 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
376 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
377 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
378 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
379 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a,
380 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
381 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
382 mmSQ_CONFIG, 0x07f80000, 0x07180000,
383 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
384 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
385 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
386 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
387 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
390 static const u32 polaris10_golden_common_all[] =
392 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
393 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
394 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
395 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
396 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
397 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
398 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
399 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
402 static const u32 fiji_golden_common_all[] =
404 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
405 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
406 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
407 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
408 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
409 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
410 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
411 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
412 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
413 mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
416 static const u32 golden_settings_fiji_a10[] =
418 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
419 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
420 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
421 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
422 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
423 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
424 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
425 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
426 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
427 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
428 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
431 static const u32 fiji_mgcg_cgcg_init[] =
433 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
434 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
435 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
436 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
437 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
438 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
439 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
440 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
441 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
442 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
443 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
444 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
445 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
446 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
447 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
448 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
449 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
450 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
451 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
452 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
453 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
454 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
455 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
456 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
457 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
458 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
459 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
460 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
461 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
462 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
463 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
464 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
465 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
466 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
467 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
470 static const u32 golden_settings_iceland_a11[] =
472 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
473 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
474 mmDB_DEBUG3, 0xc0000000, 0xc0000000,
475 mmGB_GPU_ID, 0x0000000f, 0x00000000,
476 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
477 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
478 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
479 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
480 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
481 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
482 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
483 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
484 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
485 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
486 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
487 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
490 static const u32 iceland_golden_common_all[] =
492 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
493 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
494 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
495 mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
496 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
497 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
498 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
499 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
502 static const u32 iceland_mgcg_cgcg_init[] =
504 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
505 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
506 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
507 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
508 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
509 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
510 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
511 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
512 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
513 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
514 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
515 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
516 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
517 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
518 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
519 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
520 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
521 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
522 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
523 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
524 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
525 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
526 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
527 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
528 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
529 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
530 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
531 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
532 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
533 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
534 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
535 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
536 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
537 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
538 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
539 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
540 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
541 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
542 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
543 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
544 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
545 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
546 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
547 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
548 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
549 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
550 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
551 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
552 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
553 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
554 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
555 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
556 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
557 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
558 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
559 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
560 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
561 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
562 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
563 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
564 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
565 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
566 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
567 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
570 static const u32 cz_golden_settings_a11[] =
572 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
573 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
574 mmGB_GPU_ID, 0x0000000f, 0x00000000,
575 mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
576 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
577 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
578 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
579 mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
580 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
581 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
582 mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
583 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
586 static const u32 cz_golden_common_all[] =
588 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
589 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
590 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
591 mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
592 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
593 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
594 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
595 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
598 static const u32 cz_mgcg_cgcg_init[] =
600 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
601 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
602 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
603 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
604 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
605 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
606 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
607 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
608 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
609 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
610 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
611 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
612 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
613 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
614 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
615 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
616 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
617 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
618 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
619 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
620 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
621 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
622 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
623 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
624 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
625 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
626 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
627 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
628 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
629 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
630 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
631 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
632 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
633 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
634 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
635 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
636 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
637 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
638 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
639 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
640 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
641 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
642 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
643 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
644 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
645 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
646 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
647 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
648 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
649 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
650 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
651 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
652 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
653 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
654 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
655 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
656 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
657 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
658 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
659 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
660 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
661 mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
662 mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
663 mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
664 mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
665 mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
666 mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
667 mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
668 mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
669 mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
670 mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
671 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
672 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
673 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
674 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
677 static const u32 stoney_golden_settings_a11[] =
679 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
680 mmGB_GPU_ID, 0x0000000f, 0x00000000,
681 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
682 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
683 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
684 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
685 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
686 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
687 mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
688 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
691 static const u32 stoney_golden_common_all[] =
693 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
694 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
695 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
696 mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
697 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
698 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
699 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
700 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
703 static const u32 stoney_mgcg_cgcg_init[] =
705 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
706 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
707 mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
708 mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
709 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
713 static const char * const sq_edc_source_names[] = {
714 "SQ_EDC_INFO_SOURCE_INVALID: No EDC error has occurred",
715 "SQ_EDC_INFO_SOURCE_INST: EDC source is Instruction Fetch",
716 "SQ_EDC_INFO_SOURCE_SGPR: EDC source is SGPR or SQC data return",
717 "SQ_EDC_INFO_SOURCE_VGPR: EDC source is VGPR",
718 "SQ_EDC_INFO_SOURCE_LDS: EDC source is LDS",
719 "SQ_EDC_INFO_SOURCE_GDS: EDC source is GDS",
720 "SQ_EDC_INFO_SOURCE_TA: EDC source is TA",
723 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
724 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
725 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
726 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
727 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
728 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
729 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring);
730 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring);
732 #define CG_ACLK_CNTL__ACLK_DIVIDER_MASK 0x0000007fL
733 #define CG_ACLK_CNTL__ACLK_DIVIDER__SHIFT 0x00000000L
735 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
739 switch (adev->asic_type) {
741 amdgpu_device_program_register_sequence(adev,
742 iceland_mgcg_cgcg_init,
743 ARRAY_SIZE(iceland_mgcg_cgcg_init));
744 amdgpu_device_program_register_sequence(adev,
745 golden_settings_iceland_a11,
746 ARRAY_SIZE(golden_settings_iceland_a11));
747 amdgpu_device_program_register_sequence(adev,
748 iceland_golden_common_all,
749 ARRAY_SIZE(iceland_golden_common_all));
752 amdgpu_device_program_register_sequence(adev,
754 ARRAY_SIZE(fiji_mgcg_cgcg_init));
755 amdgpu_device_program_register_sequence(adev,
756 golden_settings_fiji_a10,
757 ARRAY_SIZE(golden_settings_fiji_a10));
758 amdgpu_device_program_register_sequence(adev,
759 fiji_golden_common_all,
760 ARRAY_SIZE(fiji_golden_common_all));
764 amdgpu_device_program_register_sequence(adev,
765 tonga_mgcg_cgcg_init,
766 ARRAY_SIZE(tonga_mgcg_cgcg_init));
767 amdgpu_device_program_register_sequence(adev,
768 golden_settings_tonga_a11,
769 ARRAY_SIZE(golden_settings_tonga_a11));
770 amdgpu_device_program_register_sequence(adev,
771 tonga_golden_common_all,
772 ARRAY_SIZE(tonga_golden_common_all));
775 amdgpu_device_program_register_sequence(adev,
776 golden_settings_vegam_a11,
777 ARRAY_SIZE(golden_settings_vegam_a11));
778 amdgpu_device_program_register_sequence(adev,
779 vegam_golden_common_all,
780 ARRAY_SIZE(vegam_golden_common_all));
784 amdgpu_device_program_register_sequence(adev,
785 golden_settings_polaris11_a11,
786 ARRAY_SIZE(golden_settings_polaris11_a11));
787 amdgpu_device_program_register_sequence(adev,
788 polaris11_golden_common_all,
789 ARRAY_SIZE(polaris11_golden_common_all));
792 amdgpu_device_program_register_sequence(adev,
793 golden_settings_polaris10_a11,
794 ARRAY_SIZE(golden_settings_polaris10_a11));
795 amdgpu_device_program_register_sequence(adev,
796 polaris10_golden_common_all,
797 ARRAY_SIZE(polaris10_golden_common_all));
798 data = RREG32_SMC(ixCG_ACLK_CNTL);
799 data &= ~CG_ACLK_CNTL__ACLK_DIVIDER_MASK;
800 data |= 0x18 << CG_ACLK_CNTL__ACLK_DIVIDER__SHIFT;
801 WREG32_SMC(ixCG_ACLK_CNTL, data);
802 if ((adev->pdev->device == 0x67DF) && (adev->pdev->revision == 0xc7) &&
803 ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) ||
804 (adev->pdev->subsystem_device == 0x4a8 && adev->pdev->subsystem_vendor == 0x1043) ||
805 (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1680))) {
806 amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD);
807 amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0);
811 amdgpu_device_program_register_sequence(adev,
813 ARRAY_SIZE(cz_mgcg_cgcg_init));
814 amdgpu_device_program_register_sequence(adev,
815 cz_golden_settings_a11,
816 ARRAY_SIZE(cz_golden_settings_a11));
817 amdgpu_device_program_register_sequence(adev,
818 cz_golden_common_all,
819 ARRAY_SIZE(cz_golden_common_all));
822 amdgpu_device_program_register_sequence(adev,
823 stoney_mgcg_cgcg_init,
824 ARRAY_SIZE(stoney_mgcg_cgcg_init));
825 amdgpu_device_program_register_sequence(adev,
826 stoney_golden_settings_a11,
827 ARRAY_SIZE(stoney_golden_settings_a11));
828 amdgpu_device_program_register_sequence(adev,
829 stoney_golden_common_all,
830 ARRAY_SIZE(stoney_golden_common_all));
837 static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
839 adev->gfx.scratch.num_reg = 8;
840 adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
841 adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
844 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
846 struct amdgpu_device *adev = ring->adev;
852 r = amdgpu_gfx_scratch_get(adev, &scratch);
856 WREG32(scratch, 0xCAFEDEAD);
857 r = amdgpu_ring_alloc(ring, 3);
859 goto error_free_scratch;
861 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
862 amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
863 amdgpu_ring_write(ring, 0xDEADBEEF);
864 amdgpu_ring_commit(ring);
866 for (i = 0; i < adev->usec_timeout; i++) {
867 tmp = RREG32(scratch);
868 if (tmp == 0xDEADBEEF)
873 if (i >= adev->usec_timeout)
877 amdgpu_gfx_scratch_free(adev, scratch);
881 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
883 struct amdgpu_device *adev = ring->adev;
885 struct dma_fence *f = NULL;
892 r = amdgpu_device_wb_get(adev, &index);
896 gpu_addr = adev->wb.gpu_addr + (index * 4);
897 adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
898 memset(&ib, 0, sizeof(ib));
899 r = amdgpu_ib_get(adev, NULL, 16,
900 AMDGPU_IB_POOL_DIRECT, &ib);
904 ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
905 ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
906 ib.ptr[2] = lower_32_bits(gpu_addr);
907 ib.ptr[3] = upper_32_bits(gpu_addr);
908 ib.ptr[4] = 0xDEADBEEF;
911 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
915 r = dma_fence_wait_timeout(f, false, timeout);
923 tmp = adev->wb.wb[index];
924 if (tmp == 0xDEADBEEF)
930 amdgpu_ib_free(adev, &ib, NULL);
933 amdgpu_device_wb_free(adev, index);
938 static void gfx_v8_0_free_microcode(struct amdgpu_device *adev)
940 release_firmware(adev->gfx.pfp_fw);
941 adev->gfx.pfp_fw = NULL;
942 release_firmware(adev->gfx.me_fw);
943 adev->gfx.me_fw = NULL;
944 release_firmware(adev->gfx.ce_fw);
945 adev->gfx.ce_fw = NULL;
946 release_firmware(adev->gfx.rlc_fw);
947 adev->gfx.rlc_fw = NULL;
948 release_firmware(adev->gfx.mec_fw);
949 adev->gfx.mec_fw = NULL;
950 if ((adev->asic_type != CHIP_STONEY) &&
951 (adev->asic_type != CHIP_TOPAZ))
952 release_firmware(adev->gfx.mec2_fw);
953 adev->gfx.mec2_fw = NULL;
955 kfree(adev->gfx.rlc.register_list_format);
958 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
960 const char *chip_name;
963 struct amdgpu_firmware_info *info = NULL;
964 const struct common_firmware_header *header = NULL;
965 const struct gfx_firmware_header_v1_0 *cp_hdr;
966 const struct rlc_firmware_header_v2_0 *rlc_hdr;
967 unsigned int *tmp = NULL, i;
971 switch (adev->asic_type) {
979 chip_name = "carrizo";
985 chip_name = "stoney";
988 chip_name = "polaris10";
991 chip_name = "polaris11";
994 chip_name = "polaris12";
1003 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1004 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp_2.bin", chip_name);
1005 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
1006 if (err == -ENOENT) {
1007 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
1008 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
1011 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
1012 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
1016 err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
1019 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1020 adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1021 adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1023 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1024 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me_2.bin", chip_name);
1025 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1026 if (err == -ENOENT) {
1027 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1028 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1031 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1032 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1036 err = amdgpu_ucode_validate(adev->gfx.me_fw);
1039 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1040 adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1042 adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1044 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1045 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce_2.bin", chip_name);
1046 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1047 if (err == -ENOENT) {
1048 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1049 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1052 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1053 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1057 err = amdgpu_ucode_validate(adev->gfx.ce_fw);
1060 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1061 adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1062 adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1065 * Support for MCBP/Virtualization in combination with chained IBs is
1066 * formal released on feature version #46
1068 if (adev->gfx.ce_feature_version >= 46 &&
1069 adev->gfx.pfp_feature_version >= 46) {
1070 adev->virt.chained_ib_support = true;
1071 DRM_INFO("Chained IB support enabled!\n");
1073 adev->virt.chained_ib_support = false;
1075 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
1076 err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
1079 err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
1080 rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1081 adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
1082 adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
1084 adev->gfx.rlc.save_and_restore_offset =
1085 le32_to_cpu(rlc_hdr->save_and_restore_offset);
1086 adev->gfx.rlc.clear_state_descriptor_offset =
1087 le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
1088 adev->gfx.rlc.avail_scratch_ram_locations =
1089 le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
1090 adev->gfx.rlc.reg_restore_list_size =
1091 le32_to_cpu(rlc_hdr->reg_restore_list_size);
1092 adev->gfx.rlc.reg_list_format_start =
1093 le32_to_cpu(rlc_hdr->reg_list_format_start);
1094 adev->gfx.rlc.reg_list_format_separate_start =
1095 le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
1096 adev->gfx.rlc.starting_offsets_start =
1097 le32_to_cpu(rlc_hdr->starting_offsets_start);
1098 adev->gfx.rlc.reg_list_format_size_bytes =
1099 le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
1100 adev->gfx.rlc.reg_list_size_bytes =
1101 le32_to_cpu(rlc_hdr->reg_list_size_bytes);
1103 adev->gfx.rlc.register_list_format =
1104 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
1105 adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
1107 if (!adev->gfx.rlc.register_list_format) {
1112 tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1113 le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1114 for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++)
1115 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
1117 adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1119 tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1120 le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1121 for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++)
1122 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
1124 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1125 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec_2.bin", chip_name);
1126 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1127 if (err == -ENOENT) {
1128 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1129 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1132 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1133 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1137 err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1140 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1141 adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1142 adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1144 if ((adev->asic_type != CHIP_STONEY) &&
1145 (adev->asic_type != CHIP_TOPAZ)) {
1146 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1147 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2_2.bin", chip_name);
1148 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1149 if (err == -ENOENT) {
1150 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1151 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1154 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1155 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1158 err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1161 cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1162 adev->gfx.mec2_fw->data;
1163 adev->gfx.mec2_fw_version =
1164 le32_to_cpu(cp_hdr->header.ucode_version);
1165 adev->gfx.mec2_feature_version =
1166 le32_to_cpu(cp_hdr->ucode_feature_version);
1169 adev->gfx.mec2_fw = NULL;
1173 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1174 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1175 info->fw = adev->gfx.pfp_fw;
1176 header = (const struct common_firmware_header *)info->fw->data;
1177 adev->firmware.fw_size +=
1178 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1180 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1181 info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1182 info->fw = adev->gfx.me_fw;
1183 header = (const struct common_firmware_header *)info->fw->data;
1184 adev->firmware.fw_size +=
1185 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1187 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1188 info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1189 info->fw = adev->gfx.ce_fw;
1190 header = (const struct common_firmware_header *)info->fw->data;
1191 adev->firmware.fw_size +=
1192 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1194 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1195 info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1196 info->fw = adev->gfx.rlc_fw;
1197 header = (const struct common_firmware_header *)info->fw->data;
1198 adev->firmware.fw_size +=
1199 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1201 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1202 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1203 info->fw = adev->gfx.mec_fw;
1204 header = (const struct common_firmware_header *)info->fw->data;
1205 adev->firmware.fw_size +=
1206 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1208 /* we need account JT in */
1209 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1210 adev->firmware.fw_size +=
1211 ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE);
1213 if (amdgpu_sriov_vf(adev)) {
1214 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_STORAGE];
1215 info->ucode_id = AMDGPU_UCODE_ID_STORAGE;
1216 info->fw = adev->gfx.mec_fw;
1217 adev->firmware.fw_size +=
1218 ALIGN(le32_to_cpu(64 * PAGE_SIZE), PAGE_SIZE);
1221 if (adev->gfx.mec2_fw) {
1222 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1223 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1224 info->fw = adev->gfx.mec2_fw;
1225 header = (const struct common_firmware_header *)info->fw->data;
1226 adev->firmware.fw_size +=
1227 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1233 "gfx8: Failed to load firmware \"%s\"\n",
1235 release_firmware(adev->gfx.pfp_fw);
1236 adev->gfx.pfp_fw = NULL;
1237 release_firmware(adev->gfx.me_fw);
1238 adev->gfx.me_fw = NULL;
1239 release_firmware(adev->gfx.ce_fw);
1240 adev->gfx.ce_fw = NULL;
1241 release_firmware(adev->gfx.rlc_fw);
1242 adev->gfx.rlc_fw = NULL;
1243 release_firmware(adev->gfx.mec_fw);
1244 adev->gfx.mec_fw = NULL;
1245 release_firmware(adev->gfx.mec2_fw);
1246 adev->gfx.mec2_fw = NULL;
1251 static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
1252 volatile u32 *buffer)
1255 const struct cs_section_def *sect = NULL;
1256 const struct cs_extent_def *ext = NULL;
1258 if (adev->gfx.rlc.cs_data == NULL)
1263 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1264 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1266 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1267 buffer[count++] = cpu_to_le32(0x80000000);
1268 buffer[count++] = cpu_to_le32(0x80000000);
1270 for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1271 for (ext = sect->section; ext->extent != NULL; ++ext) {
1272 if (sect->id == SECT_CONTEXT) {
1274 cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1275 buffer[count++] = cpu_to_le32(ext->reg_index -
1276 PACKET3_SET_CONTEXT_REG_START);
1277 for (i = 0; i < ext->reg_count; i++)
1278 buffer[count++] = cpu_to_le32(ext->extent[i]);
1285 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1286 buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
1287 PACKET3_SET_CONTEXT_REG_START);
1288 buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config);
1289 buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1);
1291 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1292 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1294 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1295 buffer[count++] = cpu_to_le32(0);
1298 static int gfx_v8_0_cp_jump_table_num(struct amdgpu_device *adev)
1300 if (adev->asic_type == CHIP_CARRIZO)
1306 static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
1308 const struct cs_section_def *cs_data;
1311 adev->gfx.rlc.cs_data = vi_cs_data;
1313 cs_data = adev->gfx.rlc.cs_data;
1316 /* init clear state block */
1317 r = amdgpu_gfx_rlc_init_csb(adev);
1322 if ((adev->asic_type == CHIP_CARRIZO) ||
1323 (adev->asic_type == CHIP_STONEY)) {
1324 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1325 r = amdgpu_gfx_rlc_init_cpt(adev);
1330 /* init spm vmid with 0xf */
1331 if (adev->gfx.rlc.funcs->update_spm_vmid)
1332 adev->gfx.rlc.funcs->update_spm_vmid(adev, 0xf);
1337 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
1339 amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1342 static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
1346 size_t mec_hpd_size;
1348 bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1350 /* take ownership of the relevant compute queues */
1351 amdgpu_gfx_compute_queue_acquire(adev);
1353 mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE;
1355 r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1356 AMDGPU_GEM_DOMAIN_VRAM,
1357 &adev->gfx.mec.hpd_eop_obj,
1358 &adev->gfx.mec.hpd_eop_gpu_addr,
1361 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1365 memset(hpd, 0, mec_hpd_size);
1367 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1368 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1374 static const u32 vgpr_init_compute_shader[] =
1376 0x7e000209, 0x7e020208,
1377 0x7e040207, 0x7e060206,
1378 0x7e080205, 0x7e0a0204,
1379 0x7e0c0203, 0x7e0e0202,
1380 0x7e100201, 0x7e120200,
1381 0x7e140209, 0x7e160208,
1382 0x7e180207, 0x7e1a0206,
1383 0x7e1c0205, 0x7e1e0204,
1384 0x7e200203, 0x7e220202,
1385 0x7e240201, 0x7e260200,
1386 0x7e280209, 0x7e2a0208,
1387 0x7e2c0207, 0x7e2e0206,
1388 0x7e300205, 0x7e320204,
1389 0x7e340203, 0x7e360202,
1390 0x7e380201, 0x7e3a0200,
1391 0x7e3c0209, 0x7e3e0208,
1392 0x7e400207, 0x7e420206,
1393 0x7e440205, 0x7e460204,
1394 0x7e480203, 0x7e4a0202,
1395 0x7e4c0201, 0x7e4e0200,
1396 0x7e500209, 0x7e520208,
1397 0x7e540207, 0x7e560206,
1398 0x7e580205, 0x7e5a0204,
1399 0x7e5c0203, 0x7e5e0202,
1400 0x7e600201, 0x7e620200,
1401 0x7e640209, 0x7e660208,
1402 0x7e680207, 0x7e6a0206,
1403 0x7e6c0205, 0x7e6e0204,
1404 0x7e700203, 0x7e720202,
1405 0x7e740201, 0x7e760200,
1406 0x7e780209, 0x7e7a0208,
1407 0x7e7c0207, 0x7e7e0206,
1408 0xbf8a0000, 0xbf810000,
1411 static const u32 sgpr_init_compute_shader[] =
1413 0xbe8a0100, 0xbe8c0102,
1414 0xbe8e0104, 0xbe900106,
1415 0xbe920108, 0xbe940100,
1416 0xbe960102, 0xbe980104,
1417 0xbe9a0106, 0xbe9c0108,
1418 0xbe9e0100, 0xbea00102,
1419 0xbea20104, 0xbea40106,
1420 0xbea60108, 0xbea80100,
1421 0xbeaa0102, 0xbeac0104,
1422 0xbeae0106, 0xbeb00108,
1423 0xbeb20100, 0xbeb40102,
1424 0xbeb60104, 0xbeb80106,
1425 0xbeba0108, 0xbebc0100,
1426 0xbebe0102, 0xbec00104,
1427 0xbec20106, 0xbec40108,
1428 0xbec60100, 0xbec80102,
1429 0xbee60004, 0xbee70005,
1430 0xbeea0006, 0xbeeb0007,
1431 0xbee80008, 0xbee90009,
1432 0xbefc0000, 0xbf8a0000,
1433 0xbf810000, 0x00000000,
1436 static const u32 vgpr_init_regs[] =
1438 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1439 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */
1440 mmCOMPUTE_NUM_THREAD_X, 256*4,
1441 mmCOMPUTE_NUM_THREAD_Y, 1,
1442 mmCOMPUTE_NUM_THREAD_Z, 1,
1443 mmCOMPUTE_PGM_RSRC1, 0x100004f, /* VGPRS=15 (64 logical VGPRs), SGPRS=1 (16 SGPRs), BULKY=1 */
1444 mmCOMPUTE_PGM_RSRC2, 20,
1445 mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1446 mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1447 mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1448 mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1449 mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1450 mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1451 mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1452 mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1453 mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1454 mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1457 static const u32 sgpr1_init_regs[] =
1459 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1460 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */
1461 mmCOMPUTE_NUM_THREAD_X, 256*5,
1462 mmCOMPUTE_NUM_THREAD_Y, 1,
1463 mmCOMPUTE_NUM_THREAD_Z, 1,
1464 mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */
1465 mmCOMPUTE_PGM_RSRC2, 20,
1466 mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1467 mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1468 mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1469 mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1470 mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1471 mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1472 mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1473 mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1474 mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1475 mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1478 static const u32 sgpr2_init_regs[] =
1480 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1481 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1482 mmCOMPUTE_NUM_THREAD_X, 256*5,
1483 mmCOMPUTE_NUM_THREAD_Y, 1,
1484 mmCOMPUTE_NUM_THREAD_Z, 1,
1485 mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */
1486 mmCOMPUTE_PGM_RSRC2, 20,
1487 mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1488 mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1489 mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1490 mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1491 mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1492 mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1493 mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1494 mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1495 mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1496 mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1499 static const u32 sec_ded_counter_registers[] =
1502 mmCPC_EDC_SCRATCH_CNT,
1503 mmCPC_EDC_UCODE_CNT,
1510 mmDC_EDC_CSINVOC_CNT,
1511 mmDC_EDC_RESTORE_CNT,
1517 mmSQC_ATC_EDC_GATCL1_CNT,
1523 mmTCP_ATC_EDC_GATCL1_CNT,
1528 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1530 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1531 struct amdgpu_ib ib;
1532 struct dma_fence *f = NULL;
1535 unsigned total_size, vgpr_offset, sgpr_offset;
1538 /* only supported on CZ */
1539 if (adev->asic_type != CHIP_CARRIZO)
1542 /* bail if the compute ring is not ready */
1543 if (!ring->sched.ready)
1546 tmp = RREG32(mmGB_EDC_MODE);
1547 WREG32(mmGB_EDC_MODE, 0);
1550 (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1552 (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1554 (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1555 total_size = ALIGN(total_size, 256);
1556 vgpr_offset = total_size;
1557 total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1558 sgpr_offset = total_size;
1559 total_size += sizeof(sgpr_init_compute_shader);
1561 /* allocate an indirect buffer to put the commands in */
1562 memset(&ib, 0, sizeof(ib));
1563 r = amdgpu_ib_get(adev, NULL, total_size,
1564 AMDGPU_IB_POOL_DIRECT, &ib);
1566 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1570 /* load the compute shaders */
1571 for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1572 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1574 for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1575 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1577 /* init the ib length to 0 */
1581 /* write the register state for the compute dispatch */
1582 for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1583 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1584 ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1585 ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1587 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1588 gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1589 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1590 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1591 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1592 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1594 /* write dispatch packet */
1595 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1596 ib.ptr[ib.length_dw++] = 8; /* x */
1597 ib.ptr[ib.length_dw++] = 1; /* y */
1598 ib.ptr[ib.length_dw++] = 1; /* z */
1599 ib.ptr[ib.length_dw++] =
1600 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1602 /* write CS partial flush packet */
1603 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1604 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1607 /* write the register state for the compute dispatch */
1608 for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1609 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1610 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1611 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1613 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1614 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1615 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1616 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1617 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1618 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1620 /* write dispatch packet */
1621 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1622 ib.ptr[ib.length_dw++] = 8; /* x */
1623 ib.ptr[ib.length_dw++] = 1; /* y */
1624 ib.ptr[ib.length_dw++] = 1; /* z */
1625 ib.ptr[ib.length_dw++] =
1626 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1628 /* write CS partial flush packet */
1629 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1630 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1633 /* write the register state for the compute dispatch */
1634 for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1635 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1636 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1637 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1639 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1640 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1641 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1642 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1643 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1644 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1646 /* write dispatch packet */
1647 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1648 ib.ptr[ib.length_dw++] = 8; /* x */
1649 ib.ptr[ib.length_dw++] = 1; /* y */
1650 ib.ptr[ib.length_dw++] = 1; /* z */
1651 ib.ptr[ib.length_dw++] =
1652 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1654 /* write CS partial flush packet */
1655 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1656 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1658 /* shedule the ib on the ring */
1659 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1661 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1665 /* wait for the GPU to finish processing the IB */
1666 r = dma_fence_wait(f, false);
1668 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1672 tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1673 tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1674 WREG32(mmGB_EDC_MODE, tmp);
1676 tmp = RREG32(mmCC_GC_EDC_CONFIG);
1677 tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1678 WREG32(mmCC_GC_EDC_CONFIG, tmp);
1681 /* read back registers to clear the counters */
1682 for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1683 RREG32(sec_ded_counter_registers[i]);
1686 amdgpu_ib_free(adev, &ib, NULL);
1692 static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1696 u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1700 switch (adev->asic_type) {
1702 adev->gfx.config.max_shader_engines = 1;
1703 adev->gfx.config.max_tile_pipes = 2;
1704 adev->gfx.config.max_cu_per_sh = 6;
1705 adev->gfx.config.max_sh_per_se = 1;
1706 adev->gfx.config.max_backends_per_se = 2;
1707 adev->gfx.config.max_texture_channel_caches = 2;
1708 adev->gfx.config.max_gprs = 256;
1709 adev->gfx.config.max_gs_threads = 32;
1710 adev->gfx.config.max_hw_contexts = 8;
1712 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1713 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1714 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1715 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1716 gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1719 adev->gfx.config.max_shader_engines = 4;
1720 adev->gfx.config.max_tile_pipes = 16;
1721 adev->gfx.config.max_cu_per_sh = 16;
1722 adev->gfx.config.max_sh_per_se = 1;
1723 adev->gfx.config.max_backends_per_se = 4;
1724 adev->gfx.config.max_texture_channel_caches = 16;
1725 adev->gfx.config.max_gprs = 256;
1726 adev->gfx.config.max_gs_threads = 32;
1727 adev->gfx.config.max_hw_contexts = 8;
1729 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1730 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1731 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1732 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1733 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1735 case CHIP_POLARIS11:
1736 case CHIP_POLARIS12:
1737 ret = amdgpu_atombios_get_gfx_info(adev);
1740 adev->gfx.config.max_gprs = 256;
1741 adev->gfx.config.max_gs_threads = 32;
1742 adev->gfx.config.max_hw_contexts = 8;
1744 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1745 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1746 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1747 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1748 gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
1750 case CHIP_POLARIS10:
1752 ret = amdgpu_atombios_get_gfx_info(adev);
1755 adev->gfx.config.max_gprs = 256;
1756 adev->gfx.config.max_gs_threads = 32;
1757 adev->gfx.config.max_hw_contexts = 8;
1759 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1760 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1761 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1762 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1763 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1766 adev->gfx.config.max_shader_engines = 4;
1767 adev->gfx.config.max_tile_pipes = 8;
1768 adev->gfx.config.max_cu_per_sh = 8;
1769 adev->gfx.config.max_sh_per_se = 1;
1770 adev->gfx.config.max_backends_per_se = 2;
1771 adev->gfx.config.max_texture_channel_caches = 8;
1772 adev->gfx.config.max_gprs = 256;
1773 adev->gfx.config.max_gs_threads = 32;
1774 adev->gfx.config.max_hw_contexts = 8;
1776 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1777 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1778 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1779 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1780 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1783 adev->gfx.config.max_shader_engines = 1;
1784 adev->gfx.config.max_tile_pipes = 2;
1785 adev->gfx.config.max_sh_per_se = 1;
1786 adev->gfx.config.max_backends_per_se = 2;
1787 adev->gfx.config.max_cu_per_sh = 8;
1788 adev->gfx.config.max_texture_channel_caches = 2;
1789 adev->gfx.config.max_gprs = 256;
1790 adev->gfx.config.max_gs_threads = 32;
1791 adev->gfx.config.max_hw_contexts = 8;
1793 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1794 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1795 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1796 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1797 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1800 adev->gfx.config.max_shader_engines = 1;
1801 adev->gfx.config.max_tile_pipes = 2;
1802 adev->gfx.config.max_sh_per_se = 1;
1803 adev->gfx.config.max_backends_per_se = 1;
1804 adev->gfx.config.max_cu_per_sh = 3;
1805 adev->gfx.config.max_texture_channel_caches = 2;
1806 adev->gfx.config.max_gprs = 256;
1807 adev->gfx.config.max_gs_threads = 16;
1808 adev->gfx.config.max_hw_contexts = 8;
1810 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1811 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1812 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1813 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1814 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1817 adev->gfx.config.max_shader_engines = 2;
1818 adev->gfx.config.max_tile_pipes = 4;
1819 adev->gfx.config.max_cu_per_sh = 2;
1820 adev->gfx.config.max_sh_per_se = 1;
1821 adev->gfx.config.max_backends_per_se = 2;
1822 adev->gfx.config.max_texture_channel_caches = 4;
1823 adev->gfx.config.max_gprs = 256;
1824 adev->gfx.config.max_gs_threads = 32;
1825 adev->gfx.config.max_hw_contexts = 8;
1827 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1828 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1829 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1830 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1831 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1835 adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1836 mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1838 adev->gfx.config.num_banks = REG_GET_FIELD(mc_arb_ramcfg,
1839 MC_ARB_RAMCFG, NOOFBANK);
1840 adev->gfx.config.num_ranks = REG_GET_FIELD(mc_arb_ramcfg,
1841 MC_ARB_RAMCFG, NOOFRANKS);
1843 adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1844 adev->gfx.config.mem_max_burst_length_bytes = 256;
1845 if (adev->flags & AMD_IS_APU) {
1846 /* Get memory bank mapping mode. */
1847 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
1848 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1849 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1851 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
1852 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1853 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1855 /* Validate settings in case only one DIMM installed. */
1856 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
1857 dimm00_addr_map = 0;
1858 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
1859 dimm01_addr_map = 0;
1860 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
1861 dimm10_addr_map = 0;
1862 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
1863 dimm11_addr_map = 0;
1865 /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1866 /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1867 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
1868 adev->gfx.config.mem_row_size_in_kb = 2;
1870 adev->gfx.config.mem_row_size_in_kb = 1;
1872 tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
1873 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1874 if (adev->gfx.config.mem_row_size_in_kb > 4)
1875 adev->gfx.config.mem_row_size_in_kb = 4;
1878 adev->gfx.config.shader_engine_tile_size = 32;
1879 adev->gfx.config.num_gpus = 1;
1880 adev->gfx.config.multi_gpu_tile_size = 64;
1882 /* fix up row size */
1883 switch (adev->gfx.config.mem_row_size_in_kb) {
1886 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
1889 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
1892 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
1895 adev->gfx.config.gb_addr_config = gb_addr_config;
1900 static int gfx_v8_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
1901 int mec, int pipe, int queue)
1905 struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
1906 unsigned int hw_prio;
1908 ring = &adev->gfx.compute_ring[ring_id];
1913 ring->queue = queue;
1915 ring->ring_obj = NULL;
1916 ring->use_doorbell = true;
1917 ring->doorbell_index = adev->doorbell_index.mec_ring0 + ring_id;
1918 ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
1919 + (ring_id * GFX8_MEC_HPD_SIZE);
1920 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
1922 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
1923 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
1926 hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring->queue) ?
1927 AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_RING_PRIO_DEFAULT;
1928 /* type-2 packets are deprecated on MEC, use type-3 instead */
1929 r = amdgpu_ring_init(adev, ring, 1024,
1930 &adev->gfx.eop_irq, irq_type, hw_prio);
1938 static void gfx_v8_0_sq_irq_work_func(struct work_struct *work);
1940 static int gfx_v8_0_sw_init(void *handle)
1942 int i, j, k, r, ring_id;
1943 struct amdgpu_ring *ring;
1944 struct amdgpu_kiq *kiq;
1945 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1947 switch (adev->asic_type) {
1951 case CHIP_POLARIS10:
1952 case CHIP_POLARIS11:
1953 case CHIP_POLARIS12:
1955 adev->gfx.mec.num_mec = 2;
1960 adev->gfx.mec.num_mec = 1;
1964 adev->gfx.mec.num_pipe_per_mec = 4;
1965 adev->gfx.mec.num_queue_per_pipe = 8;
1968 r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_END_OF_PIPE, &adev->gfx.eop_irq);
1972 /* Privileged reg */
1973 r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_PRIV_REG_FAULT,
1974 &adev->gfx.priv_reg_irq);
1978 /* Privileged inst */
1979 r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_PRIV_INSTR_FAULT,
1980 &adev->gfx.priv_inst_irq);
1984 /* Add CP EDC/ECC irq */
1985 r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_ECC_ERROR,
1986 &adev->gfx.cp_ecc_error_irq);
1990 /* SQ interrupts. */
1991 r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SQ_INTERRUPT_MSG,
1994 DRM_ERROR("amdgpu_irq_add() for SQ failed: %d\n", r);
1998 INIT_WORK(&adev->gfx.sq_work.work, gfx_v8_0_sq_irq_work_func);
2000 adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2002 gfx_v8_0_scratch_init(adev);
2004 r = gfx_v8_0_init_microcode(adev);
2006 DRM_ERROR("Failed to load gfx firmware!\n");
2010 r = adev->gfx.rlc.funcs->init(adev);
2012 DRM_ERROR("Failed to init rlc BOs!\n");
2016 r = gfx_v8_0_mec_init(adev);
2018 DRM_ERROR("Failed to init MEC BOs!\n");
2022 /* set up the gfx ring */
2023 for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2024 ring = &adev->gfx.gfx_ring[i];
2025 ring->ring_obj = NULL;
2026 sprintf(ring->name, "gfx");
2027 /* no gfx doorbells on iceland */
2028 if (adev->asic_type != CHIP_TOPAZ) {
2029 ring->use_doorbell = true;
2030 ring->doorbell_index = adev->doorbell_index.gfx_ring0;
2033 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2034 AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP,
2035 AMDGPU_RING_PRIO_DEFAULT);
2041 /* set up the compute queues - allocate horizontally across pipes */
2043 for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
2044 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
2045 for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
2046 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
2049 r = gfx_v8_0_compute_ring_init(adev,
2060 r = amdgpu_gfx_kiq_init(adev, GFX8_MEC_HPD_SIZE);
2062 DRM_ERROR("Failed to init KIQ BOs!\n");
2066 kiq = &adev->gfx.kiq;
2067 r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
2071 /* create MQD for all compute queues as well as KIQ for SRIOV case */
2072 r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct vi_mqd_allocation));
2076 adev->gfx.ce_ram_size = 0x8000;
2078 r = gfx_v8_0_gpu_early_init(adev);
2085 static int gfx_v8_0_sw_fini(void *handle)
2087 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2090 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2091 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2092 for (i = 0; i < adev->gfx.num_compute_rings; i++)
2093 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2095 amdgpu_gfx_mqd_sw_fini(adev);
2096 amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring);
2097 amdgpu_gfx_kiq_fini(adev);
2099 gfx_v8_0_mec_fini(adev);
2100 amdgpu_gfx_rlc_fini(adev);
2101 amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
2102 &adev->gfx.rlc.clear_state_gpu_addr,
2103 (void **)&adev->gfx.rlc.cs_ptr);
2104 if ((adev->asic_type == CHIP_CARRIZO) ||
2105 (adev->asic_type == CHIP_STONEY)) {
2106 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
2107 &adev->gfx.rlc.cp_table_gpu_addr,
2108 (void **)&adev->gfx.rlc.cp_table_ptr);
2110 gfx_v8_0_free_microcode(adev);
2115 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
2117 uint32_t *modearray, *mod2array;
2118 const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
2119 const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
2122 modearray = adev->gfx.config.tile_mode_array;
2123 mod2array = adev->gfx.config.macrotile_mode_array;
2125 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2126 modearray[reg_offset] = 0;
2128 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2129 mod2array[reg_offset] = 0;
2131 switch (adev->asic_type) {
2133 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2134 PIPE_CONFIG(ADDR_SURF_P2) |
2135 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2136 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2137 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2138 PIPE_CONFIG(ADDR_SURF_P2) |
2139 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2140 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2141 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2142 PIPE_CONFIG(ADDR_SURF_P2) |
2143 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2144 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2145 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2146 PIPE_CONFIG(ADDR_SURF_P2) |
2147 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2148 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2149 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2150 PIPE_CONFIG(ADDR_SURF_P2) |
2151 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2152 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2153 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2154 PIPE_CONFIG(ADDR_SURF_P2) |
2155 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2156 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2157 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2158 PIPE_CONFIG(ADDR_SURF_P2) |
2159 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2160 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2161 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2162 PIPE_CONFIG(ADDR_SURF_P2));
2163 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2164 PIPE_CONFIG(ADDR_SURF_P2) |
2165 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2166 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2167 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2168 PIPE_CONFIG(ADDR_SURF_P2) |
2169 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2170 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2171 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2172 PIPE_CONFIG(ADDR_SURF_P2) |
2173 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2174 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2175 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2176 PIPE_CONFIG(ADDR_SURF_P2) |
2177 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2178 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2179 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2180 PIPE_CONFIG(ADDR_SURF_P2) |
2181 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2182 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2183 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2184 PIPE_CONFIG(ADDR_SURF_P2) |
2185 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2186 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2187 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2188 PIPE_CONFIG(ADDR_SURF_P2) |
2189 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2190 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2191 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2192 PIPE_CONFIG(ADDR_SURF_P2) |
2193 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2194 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2195 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2196 PIPE_CONFIG(ADDR_SURF_P2) |
2197 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2198 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2199 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2200 PIPE_CONFIG(ADDR_SURF_P2) |
2201 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2202 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2203 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2204 PIPE_CONFIG(ADDR_SURF_P2) |
2205 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2206 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2207 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2208 PIPE_CONFIG(ADDR_SURF_P2) |
2209 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2210 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2211 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2212 PIPE_CONFIG(ADDR_SURF_P2) |
2213 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2214 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2215 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2216 PIPE_CONFIG(ADDR_SURF_P2) |
2217 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2218 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2219 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2220 PIPE_CONFIG(ADDR_SURF_P2) |
2221 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2222 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2223 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2224 PIPE_CONFIG(ADDR_SURF_P2) |
2225 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2226 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2227 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2228 PIPE_CONFIG(ADDR_SURF_P2) |
2229 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2230 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2231 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2232 PIPE_CONFIG(ADDR_SURF_P2) |
2233 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2234 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2236 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2237 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2238 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2239 NUM_BANKS(ADDR_SURF_8_BANK));
2240 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2241 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2242 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2243 NUM_BANKS(ADDR_SURF_8_BANK));
2244 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2245 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2246 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2247 NUM_BANKS(ADDR_SURF_8_BANK));
2248 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2249 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2250 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2251 NUM_BANKS(ADDR_SURF_8_BANK));
2252 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2253 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2254 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2255 NUM_BANKS(ADDR_SURF_8_BANK));
2256 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2257 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2258 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2259 NUM_BANKS(ADDR_SURF_8_BANK));
2260 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2261 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2262 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2263 NUM_BANKS(ADDR_SURF_8_BANK));
2264 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2265 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2266 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2267 NUM_BANKS(ADDR_SURF_16_BANK));
2268 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2269 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2270 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2271 NUM_BANKS(ADDR_SURF_16_BANK));
2272 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2273 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2274 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2275 NUM_BANKS(ADDR_SURF_16_BANK));
2276 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2277 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2278 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2279 NUM_BANKS(ADDR_SURF_16_BANK));
2280 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2281 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2282 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2283 NUM_BANKS(ADDR_SURF_16_BANK));
2284 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2285 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2286 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2287 NUM_BANKS(ADDR_SURF_16_BANK));
2288 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2289 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2290 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2291 NUM_BANKS(ADDR_SURF_8_BANK));
2293 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2294 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2296 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2298 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2299 if (reg_offset != 7)
2300 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2305 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2306 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2307 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2308 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2309 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2310 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2311 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2312 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2313 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2314 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2315 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2316 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2317 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2318 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2319 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2320 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2321 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2322 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2323 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2324 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2325 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2326 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2327 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2328 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2329 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2330 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2331 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2332 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2333 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2334 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2335 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2336 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2337 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2338 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2339 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2340 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2341 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2342 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2343 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2344 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2345 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2346 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2347 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2348 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2349 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2350 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2351 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2352 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2353 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2354 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2355 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2356 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2357 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2358 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2359 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2360 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2361 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2362 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2363 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2364 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2365 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2366 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2367 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2368 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2369 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2370 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2371 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2372 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2373 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2374 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2375 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2376 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2377 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2378 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2379 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2380 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2381 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2382 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2383 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2384 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2385 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2386 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2387 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2388 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2389 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2390 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2391 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2392 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2393 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2394 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2395 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2396 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2397 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2398 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2399 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2400 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2401 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2402 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2403 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2404 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2405 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2406 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2407 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2408 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2409 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2410 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2411 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2412 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2413 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2414 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2415 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2416 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2417 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2418 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2419 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2420 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2421 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2422 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2423 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2424 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2425 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2426 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2428 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2429 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2430 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2431 NUM_BANKS(ADDR_SURF_8_BANK));
2432 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2433 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2434 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2435 NUM_BANKS(ADDR_SURF_8_BANK));
2436 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2437 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2438 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2439 NUM_BANKS(ADDR_SURF_8_BANK));
2440 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2441 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2442 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2443 NUM_BANKS(ADDR_SURF_8_BANK));
2444 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2445 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2446 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2447 NUM_BANKS(ADDR_SURF_8_BANK));
2448 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2449 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2450 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2451 NUM_BANKS(ADDR_SURF_8_BANK));
2452 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2453 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2454 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2455 NUM_BANKS(ADDR_SURF_8_BANK));
2456 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2457 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2458 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2459 NUM_BANKS(ADDR_SURF_8_BANK));
2460 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2461 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2462 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2463 NUM_BANKS(ADDR_SURF_8_BANK));
2464 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2465 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2466 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2467 NUM_BANKS(ADDR_SURF_8_BANK));
2468 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2469 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2470 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2471 NUM_BANKS(ADDR_SURF_8_BANK));
2472 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2473 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2474 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2475 NUM_BANKS(ADDR_SURF_8_BANK));
2476 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2477 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2478 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2479 NUM_BANKS(ADDR_SURF_8_BANK));
2480 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2481 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2482 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2483 NUM_BANKS(ADDR_SURF_4_BANK));
2485 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2486 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2488 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2489 if (reg_offset != 7)
2490 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2494 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2495 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2496 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2497 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2498 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2499 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2500 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2501 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2502 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2503 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2504 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2505 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2506 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2507 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2508 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2509 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2510 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2511 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2512 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2513 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2514 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2515 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2516 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2517 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2518 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2519 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2520 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2521 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2522 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2523 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2524 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2525 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2526 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2527 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2528 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2529 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2530 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2531 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2532 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2533 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2534 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2535 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2536 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2537 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2538 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2539 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2540 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2541 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2542 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2543 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2544 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2545 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2546 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2547 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2548 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2549 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2550 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2551 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2552 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2553 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2554 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2555 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2556 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2557 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2558 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2559 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2560 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2561 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2562 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2563 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2564 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2565 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2566 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2567 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2568 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2569 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2570 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2571 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2572 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2573 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2574 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2575 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2576 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2577 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2578 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2579 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2580 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2581 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2582 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2583 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2584 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2585 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2586 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2587 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2588 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2589 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2590 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2591 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2592 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2593 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2594 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2595 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2596 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2597 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2598 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2599 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2600 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2601 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2602 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2603 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2604 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2605 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2606 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2607 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2608 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2609 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2610 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2611 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2612 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2613 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2614 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2615 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2617 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2618 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2619 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2620 NUM_BANKS(ADDR_SURF_16_BANK));
2621 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2622 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2623 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2624 NUM_BANKS(ADDR_SURF_16_BANK));
2625 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2626 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2627 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2628 NUM_BANKS(ADDR_SURF_16_BANK));
2629 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2630 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2631 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2632 NUM_BANKS(ADDR_SURF_16_BANK));
2633 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2634 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2635 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2636 NUM_BANKS(ADDR_SURF_16_BANK));
2637 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2638 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2639 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2640 NUM_BANKS(ADDR_SURF_16_BANK));
2641 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2642 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2643 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2644 NUM_BANKS(ADDR_SURF_16_BANK));
2645 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2646 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2647 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2648 NUM_BANKS(ADDR_SURF_16_BANK));
2649 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2650 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2651 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2652 NUM_BANKS(ADDR_SURF_16_BANK));
2653 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2654 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2655 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2656 NUM_BANKS(ADDR_SURF_16_BANK));
2657 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2658 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2659 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2660 NUM_BANKS(ADDR_SURF_16_BANK));
2661 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2662 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2663 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2664 NUM_BANKS(ADDR_SURF_8_BANK));
2665 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2666 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2667 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2668 NUM_BANKS(ADDR_SURF_4_BANK));
2669 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2670 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2671 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2672 NUM_BANKS(ADDR_SURF_4_BANK));
2674 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2675 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2677 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2678 if (reg_offset != 7)
2679 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2682 case CHIP_POLARIS11:
2683 case CHIP_POLARIS12:
2684 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2685 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2686 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2687 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2688 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2689 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2690 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2691 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2692 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2693 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2694 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2695 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2696 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2697 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2698 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2699 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2700 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2701 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2702 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2703 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2704 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2705 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2706 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2707 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2708 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2709 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2710 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2711 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2712 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2713 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2714 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2715 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2716 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2717 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2718 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2719 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2720 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2721 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2722 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2723 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2724 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2725 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2726 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2727 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2728 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2729 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2730 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2731 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2732 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2733 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2734 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2735 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2736 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2737 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2738 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2739 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2740 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2741 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2742 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2743 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2744 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2745 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2746 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2747 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2748 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2749 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2750 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2751 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2752 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2753 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2754 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2755 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2756 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2757 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2758 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2759 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2760 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2761 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2762 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2763 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2764 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2765 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2766 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2767 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2768 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2769 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2770 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2771 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2772 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2773 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2774 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2775 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2776 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2777 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2778 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2779 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2780 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2781 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2782 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2783 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2784 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2785 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2786 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2787 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2788 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2789 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2790 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2791 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2792 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2793 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2794 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2795 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2796 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2797 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2798 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2799 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2800 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2801 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2802 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2803 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2804 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2805 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2807 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2808 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2809 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2810 NUM_BANKS(ADDR_SURF_16_BANK));
2812 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2813 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2814 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2815 NUM_BANKS(ADDR_SURF_16_BANK));
2817 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2818 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2819 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2820 NUM_BANKS(ADDR_SURF_16_BANK));
2822 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2823 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2824 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2825 NUM_BANKS(ADDR_SURF_16_BANK));
2827 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2828 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2829 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2830 NUM_BANKS(ADDR_SURF_16_BANK));
2832 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2833 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2834 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2835 NUM_BANKS(ADDR_SURF_16_BANK));
2837 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2838 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2839 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2840 NUM_BANKS(ADDR_SURF_16_BANK));
2842 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2843 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2844 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2845 NUM_BANKS(ADDR_SURF_16_BANK));
2847 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2848 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2849 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2850 NUM_BANKS(ADDR_SURF_16_BANK));
2852 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2853 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2854 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2855 NUM_BANKS(ADDR_SURF_16_BANK));
2857 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2858 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2859 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2860 NUM_BANKS(ADDR_SURF_16_BANK));
2862 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2863 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2864 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2865 NUM_BANKS(ADDR_SURF_16_BANK));
2867 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2868 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2869 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2870 NUM_BANKS(ADDR_SURF_8_BANK));
2872 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2873 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2874 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2875 NUM_BANKS(ADDR_SURF_4_BANK));
2877 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2878 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2880 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2881 if (reg_offset != 7)
2882 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2885 case CHIP_POLARIS10:
2886 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2887 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2888 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2889 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2890 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2891 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2892 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2893 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2894 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2895 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2896 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2897 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2898 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2899 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2900 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2901 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2902 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2903 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2904 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2905 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2906 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2907 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2908 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2909 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2910 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2911 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2912 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2913 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2914 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2915 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2916 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2917 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2918 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2919 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2920 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2921 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2922 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2923 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2924 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2925 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2926 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2927 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2928 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2929 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2930 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2931 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2932 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2933 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2934 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2935 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2936 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2937 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2938 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2939 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2940 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2941 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2942 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2943 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2944 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2945 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2946 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2947 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2948 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2949 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2950 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2951 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2952 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2953 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2954 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2955 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2956 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2957 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2958 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2959 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2960 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2961 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2962 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2963 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2964 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2965 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2966 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2967 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2968 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2969 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2970 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2971 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2972 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2973 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2974 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2975 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2976 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2977 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2978 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2979 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2980 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2981 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2982 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2983 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2984 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2985 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2986 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2987 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2988 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2989 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2990 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2991 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2992 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2993 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2994 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2995 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2996 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2997 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2998 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2999 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3000 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3001 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3002 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3003 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3004 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3005 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3006 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3007 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3009 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3010 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3011 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3012 NUM_BANKS(ADDR_SURF_16_BANK));
3014 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3015 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3016 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3017 NUM_BANKS(ADDR_SURF_16_BANK));
3019 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3020 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3021 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3022 NUM_BANKS(ADDR_SURF_16_BANK));
3024 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3025 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3026 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3027 NUM_BANKS(ADDR_SURF_16_BANK));
3029 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3030 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3031 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3032 NUM_BANKS(ADDR_SURF_16_BANK));
3034 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3035 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3036 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3037 NUM_BANKS(ADDR_SURF_16_BANK));
3039 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3040 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3041 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3042 NUM_BANKS(ADDR_SURF_16_BANK));
3044 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3045 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3046 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3047 NUM_BANKS(ADDR_SURF_16_BANK));
3049 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3050 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3051 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3052 NUM_BANKS(ADDR_SURF_16_BANK));
3054 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3055 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3056 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3057 NUM_BANKS(ADDR_SURF_16_BANK));
3059 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3060 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3061 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3062 NUM_BANKS(ADDR_SURF_16_BANK));
3064 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3065 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3066 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3067 NUM_BANKS(ADDR_SURF_8_BANK));
3069 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3070 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3071 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3072 NUM_BANKS(ADDR_SURF_4_BANK));
3074 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3075 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3076 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3077 NUM_BANKS(ADDR_SURF_4_BANK));
3079 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3080 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3082 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3083 if (reg_offset != 7)
3084 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3088 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3089 PIPE_CONFIG(ADDR_SURF_P2) |
3090 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3091 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3092 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3093 PIPE_CONFIG(ADDR_SURF_P2) |
3094 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3095 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3096 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3097 PIPE_CONFIG(ADDR_SURF_P2) |
3098 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3099 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3100 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3101 PIPE_CONFIG(ADDR_SURF_P2) |
3102 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3103 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3104 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3105 PIPE_CONFIG(ADDR_SURF_P2) |
3106 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3107 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3108 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3109 PIPE_CONFIG(ADDR_SURF_P2) |
3110 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3111 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3112 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3113 PIPE_CONFIG(ADDR_SURF_P2) |
3114 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3115 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3116 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3117 PIPE_CONFIG(ADDR_SURF_P2));
3118 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3119 PIPE_CONFIG(ADDR_SURF_P2) |
3120 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3121 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3122 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3123 PIPE_CONFIG(ADDR_SURF_P2) |
3124 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3125 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3126 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3127 PIPE_CONFIG(ADDR_SURF_P2) |
3128 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3129 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3130 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3131 PIPE_CONFIG(ADDR_SURF_P2) |
3132 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3133 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3134 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3135 PIPE_CONFIG(ADDR_SURF_P2) |
3136 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3137 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3138 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3139 PIPE_CONFIG(ADDR_SURF_P2) |
3140 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3141 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3142 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3143 PIPE_CONFIG(ADDR_SURF_P2) |
3144 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3145 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3146 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3147 PIPE_CONFIG(ADDR_SURF_P2) |
3148 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3149 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3150 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3151 PIPE_CONFIG(ADDR_SURF_P2) |
3152 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3153 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3154 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3155 PIPE_CONFIG(ADDR_SURF_P2) |
3156 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3157 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3158 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3159 PIPE_CONFIG(ADDR_SURF_P2) |
3160 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3161 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3162 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3163 PIPE_CONFIG(ADDR_SURF_P2) |
3164 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3165 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3166 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3167 PIPE_CONFIG(ADDR_SURF_P2) |
3168 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3169 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3170 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3171 PIPE_CONFIG(ADDR_SURF_P2) |
3172 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3173 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3174 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3175 PIPE_CONFIG(ADDR_SURF_P2) |
3176 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3177 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3178 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3179 PIPE_CONFIG(ADDR_SURF_P2) |
3180 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3181 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3182 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3183 PIPE_CONFIG(ADDR_SURF_P2) |
3184 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3185 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3186 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3187 PIPE_CONFIG(ADDR_SURF_P2) |
3188 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3189 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3191 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3192 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3193 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3194 NUM_BANKS(ADDR_SURF_8_BANK));
3195 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3196 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3197 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3198 NUM_BANKS(ADDR_SURF_8_BANK));
3199 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3200 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3201 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3202 NUM_BANKS(ADDR_SURF_8_BANK));
3203 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3204 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3205 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3206 NUM_BANKS(ADDR_SURF_8_BANK));
3207 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3208 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3209 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3210 NUM_BANKS(ADDR_SURF_8_BANK));
3211 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3212 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3213 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3214 NUM_BANKS(ADDR_SURF_8_BANK));
3215 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3216 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3217 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3218 NUM_BANKS(ADDR_SURF_8_BANK));
3219 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3220 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3221 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3222 NUM_BANKS(ADDR_SURF_16_BANK));
3223 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3224 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3225 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3226 NUM_BANKS(ADDR_SURF_16_BANK));
3227 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3228 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3229 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3230 NUM_BANKS(ADDR_SURF_16_BANK));
3231 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3232 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3233 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3234 NUM_BANKS(ADDR_SURF_16_BANK));
3235 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3236 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3237 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3238 NUM_BANKS(ADDR_SURF_16_BANK));
3239 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3240 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3241 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3242 NUM_BANKS(ADDR_SURF_16_BANK));
3243 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3244 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3245 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3246 NUM_BANKS(ADDR_SURF_8_BANK));
3248 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3249 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3251 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3253 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3254 if (reg_offset != 7)
3255 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3260 "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
3265 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3266 PIPE_CONFIG(ADDR_SURF_P2) |
3267 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3268 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3269 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3270 PIPE_CONFIG(ADDR_SURF_P2) |
3271 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3272 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3273 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3274 PIPE_CONFIG(ADDR_SURF_P2) |
3275 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3276 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3277 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3278 PIPE_CONFIG(ADDR_SURF_P2) |
3279 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3280 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3281 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3282 PIPE_CONFIG(ADDR_SURF_P2) |
3283 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3284 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3285 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3286 PIPE_CONFIG(ADDR_SURF_P2) |
3287 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3288 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3289 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3290 PIPE_CONFIG(ADDR_SURF_P2) |
3291 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3292 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3293 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3294 PIPE_CONFIG(ADDR_SURF_P2));
3295 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3296 PIPE_CONFIG(ADDR_SURF_P2) |
3297 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3298 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3299 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3300 PIPE_CONFIG(ADDR_SURF_P2) |
3301 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3302 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3303 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3304 PIPE_CONFIG(ADDR_SURF_P2) |
3305 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3306 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3307 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3308 PIPE_CONFIG(ADDR_SURF_P2) |
3309 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3310 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3311 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3312 PIPE_CONFIG(ADDR_SURF_P2) |
3313 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3314 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3315 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3316 PIPE_CONFIG(ADDR_SURF_P2) |
3317 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3318 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3319 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3320 PIPE_CONFIG(ADDR_SURF_P2) |
3321 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3322 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3323 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3324 PIPE_CONFIG(ADDR_SURF_P2) |
3325 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3326 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3327 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3328 PIPE_CONFIG(ADDR_SURF_P2) |
3329 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3330 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3331 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3332 PIPE_CONFIG(ADDR_SURF_P2) |
3333 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3334 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3335 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3336 PIPE_CONFIG(ADDR_SURF_P2) |
3337 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3338 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3339 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3340 PIPE_CONFIG(ADDR_SURF_P2) |
3341 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3342 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3343 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3344 PIPE_CONFIG(ADDR_SURF_P2) |
3345 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3346 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3347 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3348 PIPE_CONFIG(ADDR_SURF_P2) |
3349 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3350 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3351 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3352 PIPE_CONFIG(ADDR_SURF_P2) |
3353 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3354 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3355 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3356 PIPE_CONFIG(ADDR_SURF_P2) |
3357 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3358 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3359 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3360 PIPE_CONFIG(ADDR_SURF_P2) |
3361 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3362 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3363 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3364 PIPE_CONFIG(ADDR_SURF_P2) |
3365 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3366 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3368 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3369 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3370 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3371 NUM_BANKS(ADDR_SURF_8_BANK));
3372 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3373 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3374 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3375 NUM_BANKS(ADDR_SURF_8_BANK));
3376 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3377 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3378 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3379 NUM_BANKS(ADDR_SURF_8_BANK));
3380 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3381 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3382 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3383 NUM_BANKS(ADDR_SURF_8_BANK));
3384 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3385 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3386 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3387 NUM_BANKS(ADDR_SURF_8_BANK));
3388 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3389 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3390 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3391 NUM_BANKS(ADDR_SURF_8_BANK));
3392 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3393 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3394 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3395 NUM_BANKS(ADDR_SURF_8_BANK));
3396 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3397 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3398 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3399 NUM_BANKS(ADDR_SURF_16_BANK));
3400 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3401 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3402 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3403 NUM_BANKS(ADDR_SURF_16_BANK));
3404 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3405 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3406 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3407 NUM_BANKS(ADDR_SURF_16_BANK));
3408 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3409 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3410 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3411 NUM_BANKS(ADDR_SURF_16_BANK));
3412 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3413 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3414 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3415 NUM_BANKS(ADDR_SURF_16_BANK));
3416 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3417 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3418 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3419 NUM_BANKS(ADDR_SURF_16_BANK));
3420 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3421 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3422 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3423 NUM_BANKS(ADDR_SURF_8_BANK));
3425 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3426 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3428 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3430 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3431 if (reg_offset != 7)
3432 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3438 static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
3439 u32 se_num, u32 sh_num, u32 instance)
3443 if (instance == 0xffffffff)
3444 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
3446 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
3448 if (se_num == 0xffffffff)
3449 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3451 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3453 if (sh_num == 0xffffffff)
3454 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3456 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3458 WREG32(mmGRBM_GFX_INDEX, data);
3461 static void gfx_v8_0_select_me_pipe_q(struct amdgpu_device *adev,
3462 u32 me, u32 pipe, u32 q, u32 vm)
3464 vi_srbm_select(adev, me, pipe, q, vm);
3467 static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
3471 data = RREG32(mmCC_RB_BACKEND_DISABLE) |
3472 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3474 data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE);
3476 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
3477 adev->gfx.config.max_sh_per_se);
3479 return (~data) & mask;
3483 gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1)
3485 switch (adev->asic_type) {
3488 *rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) |
3489 RB_XSEL2(1) | PKR_MAP(2) |
3490 PKR_XSEL(1) | PKR_YSEL(1) |
3491 SE_MAP(2) | SE_XSEL(2) | SE_YSEL(3);
3492 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) |
3496 case CHIP_POLARIS10:
3497 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3498 SE_XSEL(1) | SE_YSEL(1);
3499 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(2) |
3504 *rconf |= RB_MAP_PKR0(2);
3507 case CHIP_POLARIS11:
3508 case CHIP_POLARIS12:
3509 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3510 SE_XSEL(1) | SE_YSEL(1);
3518 DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type);
3524 gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev,
3525 u32 raster_config, u32 raster_config_1,
3526 unsigned rb_mask, unsigned num_rb)
3528 unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1);
3529 unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1);
3530 unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2);
3531 unsigned rb_per_se = num_rb / num_se;
3532 unsigned se_mask[4];
3535 se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask;
3536 se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask;
3537 se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask;
3538 se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask;
3540 WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4));
3541 WARN_ON(!(sh_per_se == 1 || sh_per_se == 2));
3542 WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2));
3544 if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
3545 (!se_mask[2] && !se_mask[3]))) {
3546 raster_config_1 &= ~SE_PAIR_MAP_MASK;
3548 if (!se_mask[0] && !se_mask[1]) {
3550 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3);
3553 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0);
3557 for (se = 0; se < num_se; se++) {
3558 unsigned raster_config_se = raster_config;
3559 unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
3560 unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
3561 int idx = (se / 2) * 2;
3563 if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
3564 raster_config_se &= ~SE_MAP_MASK;
3566 if (!se_mask[idx]) {
3567 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3);
3569 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0);
3573 pkr0_mask &= rb_mask;
3574 pkr1_mask &= rb_mask;
3575 if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
3576 raster_config_se &= ~PKR_MAP_MASK;
3579 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3);
3581 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0);
3585 if (rb_per_se >= 2) {
3586 unsigned rb0_mask = 1 << (se * rb_per_se);
3587 unsigned rb1_mask = rb0_mask << 1;
3589 rb0_mask &= rb_mask;
3590 rb1_mask &= rb_mask;
3591 if (!rb0_mask || !rb1_mask) {
3592 raster_config_se &= ~RB_MAP_PKR0_MASK;
3596 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3);
3599 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0);
3603 if (rb_per_se > 2) {
3604 rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
3605 rb1_mask = rb0_mask << 1;
3606 rb0_mask &= rb_mask;
3607 rb1_mask &= rb_mask;
3608 if (!rb0_mask || !rb1_mask) {
3609 raster_config_se &= ~RB_MAP_PKR1_MASK;
3613 RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3);
3616 RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0);
3622 /* GRBM_GFX_INDEX has a different offset on VI */
3623 gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff);
3624 WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se);
3625 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3628 /* GRBM_GFX_INDEX has a different offset on VI */
3629 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3632 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
3636 u32 raster_config = 0, raster_config_1 = 0;
3638 u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
3639 adev->gfx.config.max_sh_per_se;
3640 unsigned num_rb_pipes;
3642 mutex_lock(&adev->grbm_idx_mutex);
3643 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3644 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3645 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3646 data = gfx_v8_0_get_rb_active_bitmap(adev);
3647 active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
3648 rb_bitmap_width_per_sh);
3651 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3653 adev->gfx.config.backend_enable_mask = active_rbs;
3654 adev->gfx.config.num_rbs = hweight32(active_rbs);
3656 num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se *
3657 adev->gfx.config.max_shader_engines, 16);
3659 gfx_v8_0_raster_config(adev, &raster_config, &raster_config_1);
3661 if (!adev->gfx.config.backend_enable_mask ||
3662 adev->gfx.config.num_rbs >= num_rb_pipes) {
3663 WREG32(mmPA_SC_RASTER_CONFIG, raster_config);
3664 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3666 gfx_v8_0_write_harvested_raster_configs(adev, raster_config, raster_config_1,
3667 adev->gfx.config.backend_enable_mask,
3671 /* cache the values for userspace */
3672 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3673 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3674 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3675 adev->gfx.config.rb_config[i][j].rb_backend_disable =
3676 RREG32(mmCC_RB_BACKEND_DISABLE);
3677 adev->gfx.config.rb_config[i][j].user_rb_backend_disable =
3678 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3679 adev->gfx.config.rb_config[i][j].raster_config =
3680 RREG32(mmPA_SC_RASTER_CONFIG);
3681 adev->gfx.config.rb_config[i][j].raster_config_1 =
3682 RREG32(mmPA_SC_RASTER_CONFIG_1);
3685 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3686 mutex_unlock(&adev->grbm_idx_mutex);
3690 * gfx_v8_0_init_compute_vmid - gart enable
3692 * @adev: amdgpu_device pointer
3694 * Initialize compute vmid sh_mem registers
3697 #define DEFAULT_SH_MEM_BASES (0x6000)
3698 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
3701 uint32_t sh_mem_config;
3702 uint32_t sh_mem_bases;
3705 * Configure apertures:
3706 * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB)
3707 * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB)
3708 * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB)
3710 sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
3712 sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
3713 SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
3714 SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
3715 SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
3716 MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
3717 SH_MEM_CONFIG__PRIVATE_ATC_MASK;
3719 mutex_lock(&adev->srbm_mutex);
3720 for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
3721 vi_srbm_select(adev, 0, 0, 0, i);
3722 /* CP and shaders */
3723 WREG32(mmSH_MEM_CONFIG, sh_mem_config);
3724 WREG32(mmSH_MEM_APE1_BASE, 1);
3725 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3726 WREG32(mmSH_MEM_BASES, sh_mem_bases);
3728 vi_srbm_select(adev, 0, 0, 0, 0);
3729 mutex_unlock(&adev->srbm_mutex);
3731 /* Initialize all compute VMIDs to have no GDS, GWS, or OA
3732 acccess. These should be enabled by FW for target VMIDs. */
3733 for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
3734 WREG32(amdgpu_gds_reg_offset[i].mem_base, 0);
3735 WREG32(amdgpu_gds_reg_offset[i].mem_size, 0);
3736 WREG32(amdgpu_gds_reg_offset[i].gws, 0);
3737 WREG32(amdgpu_gds_reg_offset[i].oa, 0);
3741 static void gfx_v8_0_init_gds_vmid(struct amdgpu_device *adev)
3746 * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
3747 * access. Compute VMIDs should be enabled by FW for target VMIDs,
3748 * the driver can enable them for graphics. VMID0 should maintain
3749 * access so that HWS firmware can save/restore entries.
3751 for (vmid = 1; vmid < 16; vmid++) {
3752 WREG32(amdgpu_gds_reg_offset[vmid].mem_base, 0);
3753 WREG32(amdgpu_gds_reg_offset[vmid].mem_size, 0);
3754 WREG32(amdgpu_gds_reg_offset[vmid].gws, 0);
3755 WREG32(amdgpu_gds_reg_offset[vmid].oa, 0);
3759 static void gfx_v8_0_config_init(struct amdgpu_device *adev)
3761 switch (adev->asic_type) {
3763 adev->gfx.config.double_offchip_lds_buf = 1;
3767 adev->gfx.config.double_offchip_lds_buf = 0;
3772 static void gfx_v8_0_constants_init(struct amdgpu_device *adev)
3774 u32 tmp, sh_static_mem_cfg;
3777 WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF);
3778 WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3779 WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3780 WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
3782 gfx_v8_0_tiling_mode_table_init(adev);
3783 gfx_v8_0_setup_rb(adev);
3784 gfx_v8_0_get_cu_info(adev);
3785 gfx_v8_0_config_init(adev);
3787 /* XXX SH_MEM regs */
3788 /* where to put LDS, scratch, GPUVM in FSA64 space */
3789 sh_static_mem_cfg = REG_SET_FIELD(0, SH_STATIC_MEM_CONFIG,
3791 sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3793 sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3795 WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg);
3797 mutex_lock(&adev->srbm_mutex);
3798 for (i = 0; i < adev->vm_manager.id_mgr[0].num_ids; i++) {
3799 vi_srbm_select(adev, 0, 0, 0, i);
3800 /* CP and shaders */
3802 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
3803 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3804 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3805 SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3806 WREG32(mmSH_MEM_CONFIG, tmp);
3807 WREG32(mmSH_MEM_BASES, 0);
3809 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
3810 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3811 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3812 SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3813 WREG32(mmSH_MEM_CONFIG, tmp);
3814 tmp = adev->gmc.shared_aperture_start >> 48;
3815 WREG32(mmSH_MEM_BASES, tmp);
3818 WREG32(mmSH_MEM_APE1_BASE, 1);
3819 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3821 vi_srbm_select(adev, 0, 0, 0, 0);
3822 mutex_unlock(&adev->srbm_mutex);
3824 gfx_v8_0_init_compute_vmid(adev);
3825 gfx_v8_0_init_gds_vmid(adev);
3827 mutex_lock(&adev->grbm_idx_mutex);
3829 * making sure that the following register writes will be broadcasted
3830 * to all the shaders
3832 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3834 WREG32(mmPA_SC_FIFO_SIZE,
3835 (adev->gfx.config.sc_prim_fifo_size_frontend <<
3836 PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
3837 (adev->gfx.config.sc_prim_fifo_size_backend <<
3838 PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
3839 (adev->gfx.config.sc_hiz_tile_fifo_size <<
3840 PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
3841 (adev->gfx.config.sc_earlyz_tile_fifo_size <<
3842 PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
3844 tmp = RREG32(mmSPI_ARB_PRIORITY);
3845 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS0, 2);
3846 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS1, 2);
3847 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS2, 2);
3848 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS3, 2);
3849 WREG32(mmSPI_ARB_PRIORITY, tmp);
3851 mutex_unlock(&adev->grbm_idx_mutex);
3855 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3860 mutex_lock(&adev->grbm_idx_mutex);
3861 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3862 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3863 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3864 for (k = 0; k < adev->usec_timeout; k++) {
3865 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3869 if (k == adev->usec_timeout) {
3870 gfx_v8_0_select_se_sh(adev, 0xffffffff,
3871 0xffffffff, 0xffffffff);
3872 mutex_unlock(&adev->grbm_idx_mutex);
3873 DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
3879 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3880 mutex_unlock(&adev->grbm_idx_mutex);
3882 mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3883 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3884 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3885 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3886 for (k = 0; k < adev->usec_timeout; k++) {
3887 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3893 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3896 u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3898 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
3899 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
3900 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
3901 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
3903 WREG32(mmCP_INT_CNTL_RING0, tmp);
3906 static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
3908 adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
3910 WREG32(mmRLC_CSIB_ADDR_HI,
3911 adev->gfx.rlc.clear_state_gpu_addr >> 32);
3912 WREG32(mmRLC_CSIB_ADDR_LO,
3913 adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
3914 WREG32(mmRLC_CSIB_LENGTH,
3915 adev->gfx.rlc.clear_state_size);
3918 static void gfx_v8_0_parse_ind_reg_list(int *register_list_format,
3921 int *unique_indices,
3924 int *ind_start_offsets,
3929 bool new_entry = true;
3931 for (; ind_offset < list_size; ind_offset++) {
3935 ind_start_offsets[*offset_count] = ind_offset;
3936 *offset_count = *offset_count + 1;
3937 BUG_ON(*offset_count >= max_offset);
3940 if (register_list_format[ind_offset] == 0xFFFFFFFF) {
3947 /* look for the matching indice */
3949 indices < *indices_count;
3951 if (unique_indices[indices] ==
3952 register_list_format[ind_offset])
3956 if (indices >= *indices_count) {
3957 unique_indices[*indices_count] =
3958 register_list_format[ind_offset];
3959 indices = *indices_count;
3960 *indices_count = *indices_count + 1;
3961 BUG_ON(*indices_count >= max_indices);
3964 register_list_format[ind_offset] = indices;
3968 static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
3971 int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0};
3972 int indices_count = 0;
3973 int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3974 int offset_count = 0;
3977 unsigned int *register_list_format =
3978 kmemdup(adev->gfx.rlc.register_list_format,
3979 adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
3980 if (!register_list_format)
3983 gfx_v8_0_parse_ind_reg_list(register_list_format,
3984 RLC_FormatDirectRegListLength,
3985 adev->gfx.rlc.reg_list_format_size_bytes >> 2,
3988 ARRAY_SIZE(unique_indices),
3989 indirect_start_offsets,
3991 ARRAY_SIZE(indirect_start_offsets));
3993 /* save and restore list */
3994 WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1);
3996 WREG32(mmRLC_SRM_ARAM_ADDR, 0);
3997 for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
3998 WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]);
4001 WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start);
4002 for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
4003 WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]);
4005 list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
4006 list_size = list_size >> 1;
4007 WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size);
4008 WREG32(mmRLC_GPM_SCRATCH_DATA, list_size);
4010 /* starting offsets starts */
4011 WREG32(mmRLC_GPM_SCRATCH_ADDR,
4012 adev->gfx.rlc.starting_offsets_start);
4013 for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
4014 WREG32(mmRLC_GPM_SCRATCH_DATA,
4015 indirect_start_offsets[i]);
4017 /* unique indices */
4018 temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
4019 data = mmRLC_SRM_INDEX_CNTL_DATA_0;
4020 for (i = 0; i < ARRAY_SIZE(unique_indices); i++) {
4021 if (unique_indices[i] != 0) {
4022 WREG32(temp + i, unique_indices[i] & 0x3FFFF);
4023 WREG32(data + i, unique_indices[i] >> 20);
4026 kfree(register_list_format);
4031 static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
4033 WREG32_FIELD(RLC_SRM_CNTL, SRM_ENABLE, 1);
4036 static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev)
4040 WREG32_FIELD(CP_RB_WPTR_POLL_CNTL, IDLE_POLL_COUNT, 0x60);
4042 data = REG_SET_FIELD(0, RLC_PG_DELAY, POWER_UP_DELAY, 0x10);
4043 data = REG_SET_FIELD(data, RLC_PG_DELAY, POWER_DOWN_DELAY, 0x10);
4044 data = REG_SET_FIELD(data, RLC_PG_DELAY, CMD_PROPAGATE_DELAY, 0x10);
4045 data = REG_SET_FIELD(data, RLC_PG_DELAY, MEM_SLEEP_DELAY, 0x10);
4046 WREG32(mmRLC_PG_DELAY, data);
4048 WREG32_FIELD(RLC_PG_DELAY_2, SERDES_CMD_DELAY, 0x3);
4049 WREG32_FIELD(RLC_AUTO_PG_CTRL, GRBM_REG_SAVE_GFX_IDLE_THRESHOLD, 0x55f0);
4053 static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
4056 WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PU_ENABLE, enable ? 1 : 0);
4059 static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
4062 WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PD_ENABLE, enable ? 1 : 0);
4065 static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable)
4067 WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 0 : 1);
4070 static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
4072 if ((adev->asic_type == CHIP_CARRIZO) ||
4073 (adev->asic_type == CHIP_STONEY)) {
4074 gfx_v8_0_init_csb(adev);
4075 gfx_v8_0_init_save_restore_list(adev);
4076 gfx_v8_0_enable_save_restore_machine(adev);
4077 WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
4078 gfx_v8_0_init_power_gating(adev);
4079 WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
4080 } else if ((adev->asic_type == CHIP_POLARIS11) ||
4081 (adev->asic_type == CHIP_POLARIS12) ||
4082 (adev->asic_type == CHIP_VEGAM)) {
4083 gfx_v8_0_init_csb(adev);
4084 gfx_v8_0_init_save_restore_list(adev);
4085 gfx_v8_0_enable_save_restore_machine(adev);
4086 gfx_v8_0_init_power_gating(adev);
4091 static void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
4093 WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 0);
4095 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4096 gfx_v8_0_wait_for_rlc_serdes(adev);
4099 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
4101 WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4104 WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
4108 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
4110 WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 1);
4112 /* carrizo do enable cp interrupt after cp inited */
4113 if (!(adev->flags & AMD_IS_APU))
4114 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4119 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
4121 if (amdgpu_sriov_vf(adev)) {
4122 gfx_v8_0_init_csb(adev);
4126 adev->gfx.rlc.funcs->stop(adev);
4127 adev->gfx.rlc.funcs->reset(adev);
4128 gfx_v8_0_init_pg(adev);
4129 adev->gfx.rlc.funcs->start(adev);
4134 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
4136 u32 tmp = RREG32(mmCP_ME_CNTL);
4139 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
4140 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
4141 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
4143 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
4144 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
4145 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
4147 WREG32(mmCP_ME_CNTL, tmp);
4151 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
4154 const struct cs_section_def *sect = NULL;
4155 const struct cs_extent_def *ext = NULL;
4157 /* begin clear state */
4159 /* context control state */
4162 for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4163 for (ext = sect->section; ext->extent != NULL; ++ext) {
4164 if (sect->id == SECT_CONTEXT)
4165 count += 2 + ext->reg_count;
4170 /* pa_sc_raster_config/pa_sc_raster_config1 */
4172 /* end clear state */
4180 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
4182 struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
4183 const struct cs_section_def *sect = NULL;
4184 const struct cs_extent_def *ext = NULL;
4188 WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
4189 WREG32(mmCP_ENDIAN_SWAP, 0);
4190 WREG32(mmCP_DEVICE_ID, 1);
4192 gfx_v8_0_cp_gfx_enable(adev, true);
4194 r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
4196 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
4200 /* clear state buffer */
4201 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4202 amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4204 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4205 amdgpu_ring_write(ring, 0x80000000);
4206 amdgpu_ring_write(ring, 0x80000000);
4208 for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4209 for (ext = sect->section; ext->extent != NULL; ++ext) {
4210 if (sect->id == SECT_CONTEXT) {
4211 amdgpu_ring_write(ring,
4212 PACKET3(PACKET3_SET_CONTEXT_REG,
4214 amdgpu_ring_write(ring,
4215 ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
4216 for (i = 0; i < ext->reg_count; i++)
4217 amdgpu_ring_write(ring, ext->extent[i]);
4222 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4223 amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
4224 amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config);
4225 amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config_1);
4227 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4228 amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4230 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4231 amdgpu_ring_write(ring, 0);
4233 /* init the CE partitions */
4234 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4235 amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4236 amdgpu_ring_write(ring, 0x8000);
4237 amdgpu_ring_write(ring, 0x8000);
4239 amdgpu_ring_commit(ring);
4243 static void gfx_v8_0_set_cpg_door_bell(struct amdgpu_device *adev, struct amdgpu_ring *ring)
4246 /* no gfx doorbells on iceland */
4247 if (adev->asic_type == CHIP_TOPAZ)
4250 tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
4252 if (ring->use_doorbell) {
4253 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4254 DOORBELL_OFFSET, ring->doorbell_index);
4255 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4257 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4260 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
4263 WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
4265 if (adev->flags & AMD_IS_APU)
4268 tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
4269 DOORBELL_RANGE_LOWER,
4270 adev->doorbell_index.gfx_ring0);
4271 WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
4273 WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
4274 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
4277 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
4279 struct amdgpu_ring *ring;
4282 u64 rb_addr, rptr_addr, wptr_gpu_addr;
4284 /* Set the write pointer delay */
4285 WREG32(mmCP_RB_WPTR_DELAY, 0);
4287 /* set the RB to use vmid 0 */
4288 WREG32(mmCP_RB_VMID, 0);
4290 /* Set ring buffer size */
4291 ring = &adev->gfx.gfx_ring[0];
4292 rb_bufsz = order_base_2(ring->ring_size / 8);
4293 tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
4294 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
4295 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
4296 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
4298 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
4300 WREG32(mmCP_RB0_CNTL, tmp);
4302 /* Initialize the ring buffer's read and write pointers */
4303 WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
4305 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
4307 /* set the wb address wether it's enabled or not */
4308 rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4309 WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
4310 WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
4312 wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4313 WREG32(mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
4314 WREG32(mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
4316 WREG32(mmCP_RB0_CNTL, tmp);
4318 rb_addr = ring->gpu_addr >> 8;
4319 WREG32(mmCP_RB0_BASE, rb_addr);
4320 WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
4322 gfx_v8_0_set_cpg_door_bell(adev, ring);
4323 /* start the ring */
4324 amdgpu_ring_clear_ring(ring);
4325 gfx_v8_0_cp_gfx_start(adev);
4326 ring->sched.ready = true;
4331 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
4334 WREG32(mmCP_MEC_CNTL, 0);
4336 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
4337 adev->gfx.kiq.ring.sched.ready = false;
4343 static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring)
4346 struct amdgpu_device *adev = ring->adev;
4348 /* tell RLC which is KIQ queue */
4349 tmp = RREG32(mmRLC_CP_SCHEDULERS);
4351 tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
4352 WREG32(mmRLC_CP_SCHEDULERS, tmp);
4354 WREG32(mmRLC_CP_SCHEDULERS, tmp);
4357 static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev)
4359 struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
4360 uint64_t queue_mask = 0;
4363 for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
4364 if (!test_bit(i, adev->gfx.mec.queue_bitmap))
4367 /* This situation may be hit in the future if a new HW
4368 * generation exposes more than 64 queues. If so, the
4369 * definition of queue_mask needs updating */
4370 if (WARN_ON(i >= (sizeof(queue_mask)*8))) {
4371 DRM_ERROR("Invalid KCQ enabled: %d\n", i);
4375 queue_mask |= (1ull << i);
4378 r = amdgpu_ring_alloc(kiq_ring, (8 * adev->gfx.num_compute_rings) + 8);
4380 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
4384 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
4385 amdgpu_ring_write(kiq_ring, 0); /* vmid_mask:0 queue_type:0 (KIQ) */
4386 amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */
4387 amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */
4388 amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
4389 amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
4390 amdgpu_ring_write(kiq_ring, 0); /* oac mask */
4391 amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
4392 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4393 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4394 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
4395 uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4398 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
4399 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
4400 amdgpu_ring_write(kiq_ring,
4401 PACKET3_MAP_QUEUES_NUM_QUEUES(1));
4402 amdgpu_ring_write(kiq_ring,
4403 PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index) |
4404 PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
4405 PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
4406 PACKET3_MAP_QUEUES_ME(ring->me == 1 ? 0 : 1)); /* doorbell */
4407 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
4408 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
4409 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
4410 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
4413 amdgpu_ring_commit(kiq_ring);
4418 static int gfx_v8_0_deactivate_hqd(struct amdgpu_device *adev, u32 req)
4422 if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) {
4423 WREG32_FIELD(CP_HQD_DEQUEUE_REQUEST, DEQUEUE_REQ, req);
4424 for (i = 0; i < adev->usec_timeout; i++) {
4425 if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK))
4429 if (i == adev->usec_timeout)
4432 WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
4433 WREG32(mmCP_HQD_PQ_RPTR, 0);
4434 WREG32(mmCP_HQD_PQ_WPTR, 0);
4439 static void gfx_v8_0_mqd_set_priority(struct amdgpu_ring *ring, struct vi_mqd *mqd)
4441 struct amdgpu_device *adev = ring->adev;
4443 if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
4444 if (amdgpu_gfx_is_high_priority_compute_queue(adev, ring->queue)) {
4445 mqd->cp_hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH;
4446 mqd->cp_hqd_queue_priority =
4447 AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM;
4452 static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
4454 struct amdgpu_device *adev = ring->adev;
4455 struct vi_mqd *mqd = ring->mqd_ptr;
4456 uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
4459 mqd->header = 0xC0310800;
4460 mqd->compute_pipelinestat_enable = 0x00000001;
4461 mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4462 mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4463 mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4464 mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4465 mqd->compute_misc_reserved = 0x00000003;
4466 mqd->dynamic_cu_mask_addr_lo = lower_32_bits(ring->mqd_gpu_addr
4467 + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4468 mqd->dynamic_cu_mask_addr_hi = upper_32_bits(ring->mqd_gpu_addr
4469 + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4470 eop_base_addr = ring->eop_gpu_addr >> 8;
4471 mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
4472 mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
4474 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4475 tmp = RREG32(mmCP_HQD_EOP_CONTROL);
4476 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
4477 (order_base_2(GFX8_MEC_HPD_SIZE / 4) - 1));
4479 mqd->cp_hqd_eop_control = tmp;
4481 /* enable doorbell? */
4482 tmp = REG_SET_FIELD(RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL),
4483 CP_HQD_PQ_DOORBELL_CONTROL,
4485 ring->use_doorbell ? 1 : 0);
4487 mqd->cp_hqd_pq_doorbell_control = tmp;
4489 /* set the pointer to the MQD */
4490 mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
4491 mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
4493 /* set MQD vmid to 0 */
4494 tmp = RREG32(mmCP_MQD_CONTROL);
4495 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4496 mqd->cp_mqd_control = tmp;
4498 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4499 hqd_gpu_addr = ring->gpu_addr >> 8;
4500 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4501 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4503 /* set up the HQD, this is similar to CP_RB0_CNTL */
4504 tmp = RREG32(mmCP_HQD_PQ_CONTROL);
4505 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4506 (order_base_2(ring->ring_size / 4) - 1));
4507 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4508 ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
4510 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
4512 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
4513 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
4514 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4515 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4516 mqd->cp_hqd_pq_control = tmp;
4518 /* set the wb address whether it's enabled or not */
4519 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4520 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4521 mqd->cp_hqd_pq_rptr_report_addr_hi =
4522 upper_32_bits(wb_gpu_addr) & 0xffff;
4524 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4525 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4526 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
4527 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4530 /* enable the doorbell if requested */
4531 if (ring->use_doorbell) {
4532 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4533 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4534 DOORBELL_OFFSET, ring->doorbell_index);
4536 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4538 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4539 DOORBELL_SOURCE, 0);
4540 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4544 mqd->cp_hqd_pq_doorbell_control = tmp;
4546 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4548 mqd->cp_hqd_pq_wptr = ring->wptr;
4549 mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
4551 /* set the vmid for the queue */
4552 mqd->cp_hqd_vmid = 0;
4554 tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
4555 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
4556 mqd->cp_hqd_persistent_state = tmp;
4559 tmp = RREG32(mmCP_HQD_IB_CONTROL);
4560 tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
4561 tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MTYPE, 3);
4562 mqd->cp_hqd_ib_control = tmp;
4564 tmp = RREG32(mmCP_HQD_IQ_TIMER);
4565 tmp = REG_SET_FIELD(tmp, CP_HQD_IQ_TIMER, MTYPE, 3);
4566 mqd->cp_hqd_iq_timer = tmp;
4568 tmp = RREG32(mmCP_HQD_CTX_SAVE_CONTROL);
4569 tmp = REG_SET_FIELD(tmp, CP_HQD_CTX_SAVE_CONTROL, MTYPE, 3);
4570 mqd->cp_hqd_ctx_save_control = tmp;
4573 mqd->cp_hqd_eop_rptr = RREG32(mmCP_HQD_EOP_RPTR);
4574 mqd->cp_hqd_eop_wptr = RREG32(mmCP_HQD_EOP_WPTR);
4575 mqd->cp_hqd_ctx_save_base_addr_lo = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_LO);
4576 mqd->cp_hqd_ctx_save_base_addr_hi = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_HI);
4577 mqd->cp_hqd_cntl_stack_offset = RREG32(mmCP_HQD_CNTL_STACK_OFFSET);
4578 mqd->cp_hqd_cntl_stack_size = RREG32(mmCP_HQD_CNTL_STACK_SIZE);
4579 mqd->cp_hqd_wg_state_offset = RREG32(mmCP_HQD_WG_STATE_OFFSET);
4580 mqd->cp_hqd_ctx_save_size = RREG32(mmCP_HQD_CTX_SAVE_SIZE);
4581 mqd->cp_hqd_eop_done_events = RREG32(mmCP_HQD_EOP_EVENTS);
4582 mqd->cp_hqd_error = RREG32(mmCP_HQD_ERROR);
4583 mqd->cp_hqd_eop_wptr_mem = RREG32(mmCP_HQD_EOP_WPTR_MEM);
4584 mqd->cp_hqd_eop_dones = RREG32(mmCP_HQD_EOP_DONES);
4586 /* set static priority for a queue/ring */
4587 gfx_v8_0_mqd_set_priority(ring, mqd);
4588 mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM);
4590 /* map_queues packet doesn't need activate the queue,
4591 * so only kiq need set this field.
4593 if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
4594 mqd->cp_hqd_active = 1;
4599 static int gfx_v8_0_mqd_commit(struct amdgpu_device *adev,
4605 /* HQD registers extend from mmCP_MQD_BASE_ADDR to mmCP_HQD_ERROR */
4606 mqd_data = &mqd->cp_mqd_base_addr_lo;
4608 /* disable wptr polling */
4609 WREG32_FIELD(CP_PQ_WPTR_POLL_CNTL, EN, 0);
4611 /* program all HQD registers */
4612 for (mqd_reg = mmCP_HQD_VMID; mqd_reg <= mmCP_HQD_EOP_CONTROL; mqd_reg++)
4613 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4615 /* Tonga errata: EOP RPTR/WPTR should be left unmodified.
4616 * This is safe since EOP RPTR==WPTR for any inactive HQD
4617 * on ASICs that do not support context-save.
4618 * EOP writes/reads can start anywhere in the ring.
4620 if (adev->asic_type != CHIP_TONGA) {
4621 WREG32(mmCP_HQD_EOP_RPTR, mqd->cp_hqd_eop_rptr);
4622 WREG32(mmCP_HQD_EOP_WPTR, mqd->cp_hqd_eop_wptr);
4623 WREG32(mmCP_HQD_EOP_WPTR_MEM, mqd->cp_hqd_eop_wptr_mem);
4626 for (mqd_reg = mmCP_HQD_EOP_EVENTS; mqd_reg <= mmCP_HQD_ERROR; mqd_reg++)
4627 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4629 /* activate the HQD */
4630 for (mqd_reg = mmCP_MQD_BASE_ADDR; mqd_reg <= mmCP_HQD_ACTIVE; mqd_reg++)
4631 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4636 static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring)
4638 struct amdgpu_device *adev = ring->adev;
4639 struct vi_mqd *mqd = ring->mqd_ptr;
4640 int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
4642 gfx_v8_0_kiq_setting(ring);
4644 if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
4645 /* reset MQD to a clean status */
4646 if (adev->gfx.mec.mqd_backup[mqd_idx])
4647 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4649 /* reset ring buffer */
4651 amdgpu_ring_clear_ring(ring);
4652 mutex_lock(&adev->srbm_mutex);
4653 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4654 gfx_v8_0_mqd_commit(adev, mqd);
4655 vi_srbm_select(adev, 0, 0, 0, 0);
4656 mutex_unlock(&adev->srbm_mutex);
4658 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4659 ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4660 ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
4661 mutex_lock(&adev->srbm_mutex);
4662 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4663 gfx_v8_0_mqd_init(ring);
4664 gfx_v8_0_mqd_commit(adev, mqd);
4665 vi_srbm_select(adev, 0, 0, 0, 0);
4666 mutex_unlock(&adev->srbm_mutex);
4668 if (adev->gfx.mec.mqd_backup[mqd_idx])
4669 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4675 static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring)
4677 struct amdgpu_device *adev = ring->adev;
4678 struct vi_mqd *mqd = ring->mqd_ptr;
4679 int mqd_idx = ring - &adev->gfx.compute_ring[0];
4681 if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
4682 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4683 ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4684 ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
4685 mutex_lock(&adev->srbm_mutex);
4686 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4687 gfx_v8_0_mqd_init(ring);
4688 vi_srbm_select(adev, 0, 0, 0, 0);
4689 mutex_unlock(&adev->srbm_mutex);
4691 if (adev->gfx.mec.mqd_backup[mqd_idx])
4692 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4693 } else if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
4694 /* reset MQD to a clean status */
4695 if (adev->gfx.mec.mqd_backup[mqd_idx])
4696 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4697 /* reset ring buffer */
4699 amdgpu_ring_clear_ring(ring);
4701 amdgpu_ring_clear_ring(ring);
4706 static void gfx_v8_0_set_mec_doorbell_range(struct amdgpu_device *adev)
4708 if (adev->asic_type > CHIP_TONGA) {
4709 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, adev->doorbell_index.kiq << 2);
4710 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, adev->doorbell_index.mec_ring7 << 2);
4712 /* enable doorbells */
4713 WREG32_FIELD(CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4716 static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev)
4718 struct amdgpu_ring *ring;
4721 ring = &adev->gfx.kiq.ring;
4723 r = amdgpu_bo_reserve(ring->mqd_obj, false);
4724 if (unlikely(r != 0))
4727 r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4728 if (unlikely(r != 0))
4731 gfx_v8_0_kiq_init_queue(ring);
4732 amdgpu_bo_kunmap(ring->mqd_obj);
4733 ring->mqd_ptr = NULL;
4734 amdgpu_bo_unreserve(ring->mqd_obj);
4735 ring->sched.ready = true;
4739 static int gfx_v8_0_kcq_resume(struct amdgpu_device *adev)
4741 struct amdgpu_ring *ring = NULL;
4744 gfx_v8_0_cp_compute_enable(adev, true);
4746 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4747 ring = &adev->gfx.compute_ring[i];
4749 r = amdgpu_bo_reserve(ring->mqd_obj, false);
4750 if (unlikely(r != 0))
4752 r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4754 r = gfx_v8_0_kcq_init_queue(ring);
4755 amdgpu_bo_kunmap(ring->mqd_obj);
4756 ring->mqd_ptr = NULL;
4758 amdgpu_bo_unreserve(ring->mqd_obj);
4763 gfx_v8_0_set_mec_doorbell_range(adev);
4765 r = gfx_v8_0_kiq_kcq_enable(adev);
4773 static int gfx_v8_0_cp_test_all_rings(struct amdgpu_device *adev)
4776 struct amdgpu_ring *ring;
4778 /* collect all the ring_tests here, gfx, kiq, compute */
4779 ring = &adev->gfx.gfx_ring[0];
4780 r = amdgpu_ring_test_helper(ring);
4784 ring = &adev->gfx.kiq.ring;
4785 r = amdgpu_ring_test_helper(ring);
4789 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4790 ring = &adev->gfx.compute_ring[i];
4791 amdgpu_ring_test_helper(ring);
4797 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
4801 if (!(adev->flags & AMD_IS_APU))
4802 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4804 r = gfx_v8_0_kiq_resume(adev);
4808 r = gfx_v8_0_cp_gfx_resume(adev);
4812 r = gfx_v8_0_kcq_resume(adev);
4816 r = gfx_v8_0_cp_test_all_rings(adev);
4820 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4825 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
4827 gfx_v8_0_cp_gfx_enable(adev, enable);
4828 gfx_v8_0_cp_compute_enable(adev, enable);
4831 static int gfx_v8_0_hw_init(void *handle)
4834 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4836 gfx_v8_0_init_golden_registers(adev);
4837 gfx_v8_0_constants_init(adev);
4839 r = adev->gfx.rlc.funcs->resume(adev);
4843 r = gfx_v8_0_cp_resume(adev);
4848 static int gfx_v8_0_kcq_disable(struct amdgpu_device *adev)
4851 struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
4853 r = amdgpu_ring_alloc(kiq_ring, 6 * adev->gfx.num_compute_rings);
4855 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
4857 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4858 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4860 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
4861 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
4862 PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */
4863 PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
4864 PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) |
4865 PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
4866 amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
4867 amdgpu_ring_write(kiq_ring, 0);
4868 amdgpu_ring_write(kiq_ring, 0);
4869 amdgpu_ring_write(kiq_ring, 0);
4871 r = amdgpu_ring_test_helper(kiq_ring);
4873 DRM_ERROR("KCQ disable failed\n");
4878 static bool gfx_v8_0_is_idle(void *handle)
4880 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4882 if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE)
4883 || RREG32(mmGRBM_STATUS2) != 0x8)
4889 static bool gfx_v8_0_rlc_is_idle(void *handle)
4891 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4893 if (RREG32(mmGRBM_STATUS2) != 0x8)
4899 static int gfx_v8_0_wait_for_rlc_idle(void *handle)
4902 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4904 for (i = 0; i < adev->usec_timeout; i++) {
4905 if (gfx_v8_0_rlc_is_idle(handle))
4913 static int gfx_v8_0_wait_for_idle(void *handle)
4916 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4918 for (i = 0; i < adev->usec_timeout; i++) {
4919 if (gfx_v8_0_is_idle(handle))
4927 static int gfx_v8_0_hw_fini(void *handle)
4929 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4931 amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
4932 amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
4934 amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
4936 amdgpu_irq_put(adev, &adev->gfx.sq_irq, 0);
4938 /* disable KCQ to avoid CPC touch memory not valid anymore */
4939 gfx_v8_0_kcq_disable(adev);
4941 if (amdgpu_sriov_vf(adev)) {
4942 pr_debug("For SRIOV client, shouldn't do anything.\n");
4945 amdgpu_gfx_rlc_enter_safe_mode(adev);
4946 if (!gfx_v8_0_wait_for_idle(adev))
4947 gfx_v8_0_cp_enable(adev, false);
4949 pr_err("cp is busy, skip halt cp\n");
4950 if (!gfx_v8_0_wait_for_rlc_idle(adev))
4951 adev->gfx.rlc.funcs->stop(adev);
4953 pr_err("rlc is busy, skip halt rlc\n");
4954 amdgpu_gfx_rlc_exit_safe_mode(adev);
4959 static int gfx_v8_0_suspend(void *handle)
4961 return gfx_v8_0_hw_fini(handle);
4964 static int gfx_v8_0_resume(void *handle)
4966 return gfx_v8_0_hw_init(handle);
4969 static bool gfx_v8_0_check_soft_reset(void *handle)
4971 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4972 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4976 tmp = RREG32(mmGRBM_STATUS);
4977 if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
4978 GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
4979 GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
4980 GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
4981 GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
4982 GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK |
4983 GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
4984 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4985 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4986 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4987 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
4988 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4989 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
4993 tmp = RREG32(mmGRBM_STATUS2);
4994 if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
4995 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4996 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4998 if (REG_GET_FIELD(tmp, GRBM_STATUS2, CPF_BUSY) ||
4999 REG_GET_FIELD(tmp, GRBM_STATUS2, CPC_BUSY) ||
5000 REG_GET_FIELD(tmp, GRBM_STATUS2, CPG_BUSY)) {
5001 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5003 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5005 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5007 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET,
5008 SOFT_RESET_GRBM, 1);
5012 tmp = RREG32(mmSRBM_STATUS);
5013 if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
5014 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5015 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5016 if (REG_GET_FIELD(tmp, SRBM_STATUS, SEM_BUSY))
5017 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5018 SRBM_SOFT_RESET, SOFT_RESET_SEM, 1);
5020 if (grbm_soft_reset || srbm_soft_reset) {
5021 adev->gfx.grbm_soft_reset = grbm_soft_reset;
5022 adev->gfx.srbm_soft_reset = srbm_soft_reset;
5025 adev->gfx.grbm_soft_reset = 0;
5026 adev->gfx.srbm_soft_reset = 0;
5031 static int gfx_v8_0_pre_soft_reset(void *handle)
5033 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5034 u32 grbm_soft_reset = 0;
5036 if ((!adev->gfx.grbm_soft_reset) &&
5037 (!adev->gfx.srbm_soft_reset))
5040 grbm_soft_reset = adev->gfx.grbm_soft_reset;
5043 adev->gfx.rlc.funcs->stop(adev);
5045 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5046 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5047 /* Disable GFX parsing/prefetching */
5048 gfx_v8_0_cp_gfx_enable(adev, false);
5050 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5051 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5052 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5053 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5056 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5057 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5059 mutex_lock(&adev->srbm_mutex);
5060 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5061 gfx_v8_0_deactivate_hqd(adev, 2);
5062 vi_srbm_select(adev, 0, 0, 0, 0);
5063 mutex_unlock(&adev->srbm_mutex);
5065 /* Disable MEC parsing/prefetching */
5066 gfx_v8_0_cp_compute_enable(adev, false);
5072 static int gfx_v8_0_soft_reset(void *handle)
5074 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5075 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5078 if ((!adev->gfx.grbm_soft_reset) &&
5079 (!adev->gfx.srbm_soft_reset))
5082 grbm_soft_reset = adev->gfx.grbm_soft_reset;
5083 srbm_soft_reset = adev->gfx.srbm_soft_reset;
5085 if (grbm_soft_reset || srbm_soft_reset) {
5086 tmp = RREG32(mmGMCON_DEBUG);
5087 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 1);
5088 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 1);
5089 WREG32(mmGMCON_DEBUG, tmp);
5093 if (grbm_soft_reset) {
5094 tmp = RREG32(mmGRBM_SOFT_RESET);
5095 tmp |= grbm_soft_reset;
5096 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5097 WREG32(mmGRBM_SOFT_RESET, tmp);
5098 tmp = RREG32(mmGRBM_SOFT_RESET);
5102 tmp &= ~grbm_soft_reset;
5103 WREG32(mmGRBM_SOFT_RESET, tmp);
5104 tmp = RREG32(mmGRBM_SOFT_RESET);
5107 if (srbm_soft_reset) {
5108 tmp = RREG32(mmSRBM_SOFT_RESET);
5109 tmp |= srbm_soft_reset;
5110 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5111 WREG32(mmSRBM_SOFT_RESET, tmp);
5112 tmp = RREG32(mmSRBM_SOFT_RESET);
5116 tmp &= ~srbm_soft_reset;
5117 WREG32(mmSRBM_SOFT_RESET, tmp);
5118 tmp = RREG32(mmSRBM_SOFT_RESET);
5121 if (grbm_soft_reset || srbm_soft_reset) {
5122 tmp = RREG32(mmGMCON_DEBUG);
5123 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 0);
5124 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 0);
5125 WREG32(mmGMCON_DEBUG, tmp);
5128 /* Wait a little for things to settle down */
5134 static int gfx_v8_0_post_soft_reset(void *handle)
5136 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5137 u32 grbm_soft_reset = 0;
5139 if ((!adev->gfx.grbm_soft_reset) &&
5140 (!adev->gfx.srbm_soft_reset))
5143 grbm_soft_reset = adev->gfx.grbm_soft_reset;
5145 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5146 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5147 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5148 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5151 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5152 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5154 mutex_lock(&adev->srbm_mutex);
5155 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5156 gfx_v8_0_deactivate_hqd(adev, 2);
5157 vi_srbm_select(adev, 0, 0, 0, 0);
5158 mutex_unlock(&adev->srbm_mutex);
5160 gfx_v8_0_kiq_resume(adev);
5161 gfx_v8_0_kcq_resume(adev);
5164 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5165 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5166 gfx_v8_0_cp_gfx_resume(adev);
5168 gfx_v8_0_cp_test_all_rings(adev);
5170 adev->gfx.rlc.funcs->start(adev);
5176 * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
5178 * @adev: amdgpu_device pointer
5180 * Fetches a GPU clock counter snapshot.
5181 * Returns the 64 bit clock counter snapshot.
5183 static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
5187 mutex_lock(&adev->gfx.gpu_clock_mutex);
5188 WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
5189 clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
5190 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5191 mutex_unlock(&adev->gfx.gpu_clock_mutex);
5195 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
5197 uint32_t gds_base, uint32_t gds_size,
5198 uint32_t gws_base, uint32_t gws_size,
5199 uint32_t oa_base, uint32_t oa_size)
5202 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5203 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5204 WRITE_DATA_DST_SEL(0)));
5205 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
5206 amdgpu_ring_write(ring, 0);
5207 amdgpu_ring_write(ring, gds_base);
5210 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5211 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5212 WRITE_DATA_DST_SEL(0)));
5213 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
5214 amdgpu_ring_write(ring, 0);
5215 amdgpu_ring_write(ring, gds_size);
5218 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5219 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5220 WRITE_DATA_DST_SEL(0)));
5221 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
5222 amdgpu_ring_write(ring, 0);
5223 amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
5226 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5227 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5228 WRITE_DATA_DST_SEL(0)));
5229 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
5230 amdgpu_ring_write(ring, 0);
5231 amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
5234 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
5236 WREG32(mmSQ_IND_INDEX,
5237 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5238 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5239 (address << SQ_IND_INDEX__INDEX__SHIFT) |
5240 (SQ_IND_INDEX__FORCE_READ_MASK));
5241 return RREG32(mmSQ_IND_DATA);
5244 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
5245 uint32_t wave, uint32_t thread,
5246 uint32_t regno, uint32_t num, uint32_t *out)
5248 WREG32(mmSQ_IND_INDEX,
5249 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5250 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5251 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
5252 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
5253 (SQ_IND_INDEX__FORCE_READ_MASK) |
5254 (SQ_IND_INDEX__AUTO_INCR_MASK));
5256 *(out++) = RREG32(mmSQ_IND_DATA);
5259 static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
5261 /* type 0 wave data */
5262 dst[(*no_fields)++] = 0;
5263 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
5264 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
5265 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
5266 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
5267 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
5268 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
5269 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
5270 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
5271 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
5272 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
5273 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
5274 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
5275 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_LO);
5276 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_HI);
5277 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_LO);
5278 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI);
5279 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
5280 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
5283 static void gfx_v8_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
5284 uint32_t wave, uint32_t start,
5285 uint32_t size, uint32_t *dst)
5288 adev, simd, wave, 0,
5289 start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
5293 static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = {
5294 .get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter,
5295 .select_se_sh = &gfx_v8_0_select_se_sh,
5296 .read_wave_data = &gfx_v8_0_read_wave_data,
5297 .read_wave_sgprs = &gfx_v8_0_read_wave_sgprs,
5298 .select_me_pipe_q = &gfx_v8_0_select_me_pipe_q
5301 static int gfx_v8_0_early_init(void *handle)
5303 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5305 adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
5306 adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev),
5307 AMDGPU_MAX_COMPUTE_RINGS);
5308 adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
5309 gfx_v8_0_set_ring_funcs(adev);
5310 gfx_v8_0_set_irq_funcs(adev);
5311 gfx_v8_0_set_gds_init(adev);
5312 gfx_v8_0_set_rlc_funcs(adev);
5317 static int gfx_v8_0_late_init(void *handle)
5319 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5322 r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
5326 r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
5330 /* requires IBs so do in late init after IB pool is initialized */
5331 r = gfx_v8_0_do_edc_gpr_workarounds(adev);
5335 r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0);
5337 DRM_ERROR("amdgpu_irq_get() failed to get IRQ for EDC, r: %d.\n", r);
5341 r = amdgpu_irq_get(adev, &adev->gfx.sq_irq, 0);
5344 "amdgpu_irq_get() failed to get IRQ for SQ, r: %d.\n",
5352 static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
5355 if ((adev->asic_type == CHIP_POLARIS11) ||
5356 (adev->asic_type == CHIP_POLARIS12) ||
5357 (adev->asic_type == CHIP_VEGAM))
5358 /* Send msg to SMU via Powerplay */
5359 amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, enable);
5361 WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0);
5364 static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
5367 WREG32_FIELD(RLC_PG_CNTL, DYN_PER_CU_PG_ENABLE, enable ? 1 : 0);
5370 static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
5373 WREG32_FIELD(RLC_PG_CNTL, QUICK_PG_ENABLE, enable ? 1 : 0);
5376 static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
5379 WREG32_FIELD(RLC_PG_CNTL, GFX_POWER_GATING_ENABLE, enable ? 1 : 0);
5382 static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev,
5385 WREG32_FIELD(RLC_PG_CNTL, GFX_PIPELINE_PG_ENABLE, enable ? 1 : 0);
5387 /* Read any GFX register to wake up GFX. */
5389 RREG32(mmDB_RENDER_CONTROL);
5392 static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev,
5395 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
5396 cz_enable_gfx_cg_power_gating(adev, true);
5397 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
5398 cz_enable_gfx_pipeline_power_gating(adev, true);
5400 cz_enable_gfx_cg_power_gating(adev, false);
5401 cz_enable_gfx_pipeline_power_gating(adev, false);
5405 static int gfx_v8_0_set_powergating_state(void *handle,
5406 enum amd_powergating_state state)
5408 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5409 bool enable = (state == AMD_PG_STATE_GATE);
5411 if (amdgpu_sriov_vf(adev))
5414 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_SMG |
5415 AMD_PG_SUPPORT_RLC_SMU_HS |
5417 AMD_PG_SUPPORT_GFX_DMG))
5418 amdgpu_gfx_rlc_enter_safe_mode(adev);
5419 switch (adev->asic_type) {
5423 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
5424 cz_enable_sck_slow_down_on_power_up(adev, true);
5425 cz_enable_sck_slow_down_on_power_down(adev, true);
5427 cz_enable_sck_slow_down_on_power_up(adev, false);
5428 cz_enable_sck_slow_down_on_power_down(adev, false);
5430 if (adev->pg_flags & AMD_PG_SUPPORT_CP)
5431 cz_enable_cp_power_gating(adev, true);
5433 cz_enable_cp_power_gating(adev, false);
5435 cz_update_gfx_cg_power_gating(adev, enable);
5437 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5438 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5440 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5442 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5443 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5445 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5447 case CHIP_POLARIS11:
5448 case CHIP_POLARIS12:
5450 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5451 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5453 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5455 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5456 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5458 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5460 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable)
5461 polaris11_enable_gfx_quick_mg_power_gating(adev, true);
5463 polaris11_enable_gfx_quick_mg_power_gating(adev, false);
5468 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_SMG |
5469 AMD_PG_SUPPORT_RLC_SMU_HS |
5471 AMD_PG_SUPPORT_GFX_DMG))
5472 amdgpu_gfx_rlc_exit_safe_mode(adev);
5476 static void gfx_v8_0_get_clockgating_state(void *handle, u32 *flags)
5478 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5481 if (amdgpu_sriov_vf(adev))
5484 /* AMD_CG_SUPPORT_GFX_MGCG */
5485 data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5486 if (!(data & RLC_CGTT_MGCG_OVERRIDE__CPF_MASK))
5487 *flags |= AMD_CG_SUPPORT_GFX_MGCG;
5489 /* AMD_CG_SUPPORT_GFX_CGLG */
5490 data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5491 if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5492 *flags |= AMD_CG_SUPPORT_GFX_CGCG;
5494 /* AMD_CG_SUPPORT_GFX_CGLS */
5495 if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5496 *flags |= AMD_CG_SUPPORT_GFX_CGLS;
5498 /* AMD_CG_SUPPORT_GFX_CGTS */
5499 data = RREG32(mmCGTS_SM_CTRL_REG);
5500 if (!(data & CGTS_SM_CTRL_REG__OVERRIDE_MASK))
5501 *flags |= AMD_CG_SUPPORT_GFX_CGTS;
5503 /* AMD_CG_SUPPORT_GFX_CGTS_LS */
5504 if (!(data & CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK))
5505 *flags |= AMD_CG_SUPPORT_GFX_CGTS_LS;
5507 /* AMD_CG_SUPPORT_GFX_RLC_LS */
5508 data = RREG32(mmRLC_MEM_SLP_CNTL);
5509 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
5510 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
5512 /* AMD_CG_SUPPORT_GFX_CP_LS */
5513 data = RREG32(mmCP_MEM_SLP_CNTL);
5514 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
5515 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
5518 static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
5519 uint32_t reg_addr, uint32_t cmd)
5523 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
5525 WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5526 WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5528 data = RREG32(mmRLC_SERDES_WR_CTRL);
5529 if (adev->asic_type == CHIP_STONEY)
5530 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5531 RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5532 RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5533 RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5534 RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5535 RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5536 RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5537 RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5538 RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5540 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5541 RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5542 RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5543 RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5544 RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5545 RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5546 RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5547 RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5548 RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
5549 RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
5550 RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5551 data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
5552 (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
5553 (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
5554 (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
5556 WREG32(mmRLC_SERDES_WR_CTRL, data);
5559 #define MSG_ENTER_RLC_SAFE_MODE 1
5560 #define MSG_EXIT_RLC_SAFE_MODE 0
5561 #define RLC_GPR_REG2__REQ_MASK 0x00000001
5562 #define RLC_GPR_REG2__REQ__SHIFT 0
5563 #define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001
5564 #define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e
5566 static bool gfx_v8_0_is_rlc_enabled(struct amdgpu_device *adev)
5568 uint32_t rlc_setting;
5570 rlc_setting = RREG32(mmRLC_CNTL);
5571 if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
5577 static void gfx_v8_0_set_safe_mode(struct amdgpu_device *adev)
5581 data = RREG32(mmRLC_CNTL);
5582 data |= RLC_SAFE_MODE__CMD_MASK;
5583 data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5584 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
5585 WREG32(mmRLC_SAFE_MODE, data);
5587 /* wait for RLC_SAFE_MODE */
5588 for (i = 0; i < adev->usec_timeout; i++) {
5589 if ((RREG32(mmRLC_GPM_STAT) &
5590 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5591 RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5592 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5593 RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5597 for (i = 0; i < adev->usec_timeout; i++) {
5598 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5604 static void gfx_v8_0_unset_safe_mode(struct amdgpu_device *adev)
5609 data = RREG32(mmRLC_CNTL);
5610 data |= RLC_SAFE_MODE__CMD_MASK;
5611 data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5612 WREG32(mmRLC_SAFE_MODE, data);
5614 for (i = 0; i < adev->usec_timeout; i++) {
5615 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5621 static void gfx_v8_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid)
5625 if (amdgpu_sriov_is_pp_one_vf(adev))
5626 data = RREG32_NO_KIQ(mmRLC_SPM_VMID);
5628 data = RREG32(mmRLC_SPM_VMID);
5630 data &= ~RLC_SPM_VMID__RLC_SPM_VMID_MASK;
5631 data |= (vmid & RLC_SPM_VMID__RLC_SPM_VMID_MASK) << RLC_SPM_VMID__RLC_SPM_VMID__SHIFT;
5633 if (amdgpu_sriov_is_pp_one_vf(adev))
5634 WREG32_NO_KIQ(mmRLC_SPM_VMID, data);
5636 WREG32(mmRLC_SPM_VMID, data);
5639 static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
5640 .is_rlc_enabled = gfx_v8_0_is_rlc_enabled,
5641 .set_safe_mode = gfx_v8_0_set_safe_mode,
5642 .unset_safe_mode = gfx_v8_0_unset_safe_mode,
5643 .init = gfx_v8_0_rlc_init,
5644 .get_csb_size = gfx_v8_0_get_csb_size,
5645 .get_csb_buffer = gfx_v8_0_get_csb_buffer,
5646 .get_cp_table_num = gfx_v8_0_cp_jump_table_num,
5647 .resume = gfx_v8_0_rlc_resume,
5648 .stop = gfx_v8_0_rlc_stop,
5649 .reset = gfx_v8_0_rlc_reset,
5650 .start = gfx_v8_0_rlc_start,
5651 .update_spm_vmid = gfx_v8_0_update_spm_vmid
5654 static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
5657 uint32_t temp, data;
5659 amdgpu_gfx_rlc_enter_safe_mode(adev);
5661 /* It is disabled by HW by default */
5662 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
5663 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5664 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS)
5665 /* 1 - RLC memory Light sleep */
5666 WREG32_FIELD(RLC_MEM_SLP_CNTL, RLC_MEM_LS_EN, 1);
5668 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS)
5669 WREG32_FIELD(CP_MEM_SLP_CNTL, CP_MEM_LS_EN, 1);
5672 /* 3 - RLC_CGTT_MGCG_OVERRIDE */
5673 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5674 if (adev->flags & AMD_IS_APU)
5675 data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5676 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5677 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK);
5679 data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5680 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5681 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5682 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5685 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5687 /* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5688 gfx_v8_0_wait_for_rlc_serdes(adev);
5690 /* 5 - clear mgcg override */
5691 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5693 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
5694 /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
5695 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5696 data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
5697 data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
5698 data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
5699 data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
5700 if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
5701 (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
5702 data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
5703 data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
5704 data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
5706 WREG32(mmCGTS_SM_CTRL_REG, data);
5710 /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5711 gfx_v8_0_wait_for_rlc_serdes(adev);
5713 /* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
5714 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5715 data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5716 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5717 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5718 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5720 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5722 /* 2 - disable MGLS in RLC */
5723 data = RREG32(mmRLC_MEM_SLP_CNTL);
5724 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
5725 data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5726 WREG32(mmRLC_MEM_SLP_CNTL, data);
5729 /* 3 - disable MGLS in CP */
5730 data = RREG32(mmCP_MEM_SLP_CNTL);
5731 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
5732 data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5733 WREG32(mmCP_MEM_SLP_CNTL, data);
5736 /* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
5737 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5738 data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
5739 CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
5741 WREG32(mmCGTS_SM_CTRL_REG, data);
5743 /* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5744 gfx_v8_0_wait_for_rlc_serdes(adev);
5746 /* 6 - set mgcg override */
5747 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5751 /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5752 gfx_v8_0_wait_for_rlc_serdes(adev);
5755 amdgpu_gfx_rlc_exit_safe_mode(adev);
5758 static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5761 uint32_t temp, temp1, data, data1;
5763 temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5765 amdgpu_gfx_rlc_enter_safe_mode(adev);
5767 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
5768 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5769 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
5771 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5773 /* : wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5774 gfx_v8_0_wait_for_rlc_serdes(adev);
5776 /* 2 - clear cgcg override */
5777 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5779 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5780 gfx_v8_0_wait_for_rlc_serdes(adev);
5782 /* 3 - write cmd to set CGLS */
5783 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
5785 /* 4 - enable cgcg */
5786 data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5788 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5790 data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5792 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5793 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
5796 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5798 data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5802 WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5804 /* 5 enable cntx_empty_int_enable/cntx_busy_int_enable/
5805 * Cmp_busy/GFX_Idle interrupts
5807 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5809 /* disable cntx_empty_int_enable & GFX Idle interrupt */
5810 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5813 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5814 data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
5815 RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
5817 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5819 /* read gfx register to wake up cgcg */
5820 RREG32(mmCB_CGTT_SCLK_CTRL);
5821 RREG32(mmCB_CGTT_SCLK_CTRL);
5822 RREG32(mmCB_CGTT_SCLK_CTRL);
5823 RREG32(mmCB_CGTT_SCLK_CTRL);
5825 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5826 gfx_v8_0_wait_for_rlc_serdes(adev);
5828 /* write cmd to Set CGCG Overrride */
5829 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5831 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5832 gfx_v8_0_wait_for_rlc_serdes(adev);
5834 /* write cmd to Clear CGLS */
5835 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
5837 /* disable cgcg, cgls should be disabled too. */
5838 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
5839 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
5841 WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5842 /* enable interrupts again for PG */
5843 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5846 gfx_v8_0_wait_for_rlc_serdes(adev);
5848 amdgpu_gfx_rlc_exit_safe_mode(adev);
5850 static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
5854 /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
5855 * === MGCG + MGLS + TS(CG/LS) ===
5857 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5858 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5860 /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
5861 * === CGCG + CGLS ===
5863 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5864 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5869 static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev,
5870 enum amd_clockgating_state state)
5872 uint32_t msg_id, pp_state = 0;
5873 uint32_t pp_support_state = 0;
5875 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
5876 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5877 pp_support_state = PP_STATE_SUPPORT_LS;
5878 pp_state = PP_STATE_LS;
5880 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
5881 pp_support_state |= PP_STATE_SUPPORT_CG;
5882 pp_state |= PP_STATE_CG;
5884 if (state == AMD_CG_STATE_UNGATE)
5887 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5891 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5894 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
5895 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5896 pp_support_state = PP_STATE_SUPPORT_LS;
5897 pp_state = PP_STATE_LS;
5900 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
5901 pp_support_state |= PP_STATE_SUPPORT_CG;
5902 pp_state |= PP_STATE_CG;
5905 if (state == AMD_CG_STATE_UNGATE)
5908 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5912 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5918 static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev,
5919 enum amd_clockgating_state state)
5922 uint32_t msg_id, pp_state = 0;
5923 uint32_t pp_support_state = 0;
5925 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
5926 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5927 pp_support_state = PP_STATE_SUPPORT_LS;
5928 pp_state = PP_STATE_LS;
5930 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
5931 pp_support_state |= PP_STATE_SUPPORT_CG;
5932 pp_state |= PP_STATE_CG;
5934 if (state == AMD_CG_STATE_UNGATE)
5937 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5941 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5944 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_GFX_3D_CGLS)) {
5945 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) {
5946 pp_support_state = PP_STATE_SUPPORT_LS;
5947 pp_state = PP_STATE_LS;
5949 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) {
5950 pp_support_state |= PP_STATE_SUPPORT_CG;
5951 pp_state |= PP_STATE_CG;
5953 if (state == AMD_CG_STATE_UNGATE)
5956 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5960 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5963 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
5964 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5965 pp_support_state = PP_STATE_SUPPORT_LS;
5966 pp_state = PP_STATE_LS;
5969 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
5970 pp_support_state |= PP_STATE_SUPPORT_CG;
5971 pp_state |= PP_STATE_CG;
5974 if (state == AMD_CG_STATE_UNGATE)
5977 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5981 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5984 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
5985 pp_support_state = PP_STATE_SUPPORT_LS;
5987 if (state == AMD_CG_STATE_UNGATE)
5990 pp_state = PP_STATE_LS;
5992 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5996 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5999 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
6000 pp_support_state = PP_STATE_SUPPORT_LS;
6002 if (state == AMD_CG_STATE_UNGATE)
6005 pp_state = PP_STATE_LS;
6006 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6010 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6016 static int gfx_v8_0_set_clockgating_state(void *handle,
6017 enum amd_clockgating_state state)
6019 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
6021 if (amdgpu_sriov_vf(adev))
6024 switch (adev->asic_type) {
6028 gfx_v8_0_update_gfx_clock_gating(adev,
6029 state == AMD_CG_STATE_GATE);
6032 gfx_v8_0_tonga_update_gfx_clock_gating(adev, state);
6034 case CHIP_POLARIS10:
6035 case CHIP_POLARIS11:
6036 case CHIP_POLARIS12:
6038 gfx_v8_0_polaris_update_gfx_clock_gating(adev, state);
6046 static u64 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring)
6048 return ring->adev->wb.wb[ring->rptr_offs];
6051 static u64 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
6053 struct amdgpu_device *adev = ring->adev;
6055 if (ring->use_doorbell)
6056 /* XXX check if swapping is necessary on BE */
6057 return ring->adev->wb.wb[ring->wptr_offs];
6059 return RREG32(mmCP_RB0_WPTR);
6062 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
6064 struct amdgpu_device *adev = ring->adev;
6066 if (ring->use_doorbell) {
6067 /* XXX check if swapping is necessary on BE */
6068 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6069 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6071 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
6072 (void)RREG32(mmCP_RB0_WPTR);
6076 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
6078 u32 ref_and_mask, reg_mem_engine;
6080 if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) ||
6081 (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)) {
6084 ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
6087 ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
6094 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
6095 reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
6098 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6099 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
6100 WAIT_REG_MEM_FUNCTION(3) | /* == */
6102 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
6103 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
6104 amdgpu_ring_write(ring, ref_and_mask);
6105 amdgpu_ring_write(ring, ref_and_mask);
6106 amdgpu_ring_write(ring, 0x20); /* poll interval */
6109 static void gfx_v8_0_ring_emit_vgt_flush(struct amdgpu_ring *ring)
6111 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6112 amdgpu_ring_write(ring, EVENT_TYPE(VS_PARTIAL_FLUSH) |
6115 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6116 amdgpu_ring_write(ring, EVENT_TYPE(VGT_FLUSH) |
6120 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
6121 struct amdgpu_job *job,
6122 struct amdgpu_ib *ib,
6125 unsigned vmid = AMDGPU_JOB_GET_VMID(job);
6126 u32 header, control = 0;
6128 if (ib->flags & AMDGPU_IB_FLAG_CE)
6129 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
6131 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
6133 control |= ib->length_dw | (vmid << 24);
6135 if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
6136 control |= INDIRECT_BUFFER_PRE_ENB(1);
6138 if (!(ib->flags & AMDGPU_IB_FLAG_CE) && vmid)
6139 gfx_v8_0_ring_emit_de_meta(ring);
6142 amdgpu_ring_write(ring, header);
6143 amdgpu_ring_write(ring,
6147 (ib->gpu_addr & 0xFFFFFFFC));
6148 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6149 amdgpu_ring_write(ring, control);
6152 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
6153 struct amdgpu_job *job,
6154 struct amdgpu_ib *ib,
6157 unsigned vmid = AMDGPU_JOB_GET_VMID(job);
6158 u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
6160 /* Currently, there is a high possibility to get wave ID mismatch
6161 * between ME and GDS, leading to a hw deadlock, because ME generates
6162 * different wave IDs than the GDS expects. This situation happens
6163 * randomly when at least 5 compute pipes use GDS ordered append.
6164 * The wave IDs generated by ME are also wrong after suspend/resume.
6165 * Those are probably bugs somewhere else in the kernel driver.
6167 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
6168 * GDS to 0 for this ring (me/pipe).
6170 if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
6171 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
6172 amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID - PACKET3_SET_CONFIG_REG_START);
6173 amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
6176 amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
6177 amdgpu_ring_write(ring,
6181 (ib->gpu_addr & 0xFFFFFFFC));
6182 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6183 amdgpu_ring_write(ring, control);
6186 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
6187 u64 seq, unsigned flags)
6189 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6190 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6192 /* Workaround for cache flush problems. First send a dummy EOP
6193 * event down the pipe with seq one below.
6195 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6196 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6198 EOP_TC_WB_ACTION_EN |
6199 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6201 amdgpu_ring_write(ring, addr & 0xfffffffc);
6202 amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
6203 DATA_SEL(1) | INT_SEL(0));
6204 amdgpu_ring_write(ring, lower_32_bits(seq - 1));
6205 amdgpu_ring_write(ring, upper_32_bits(seq - 1));
6207 /* Then send the real EOP event down the pipe:
6208 * EVENT_WRITE_EOP - flush caches, send int */
6209 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6210 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6212 EOP_TC_WB_ACTION_EN |
6213 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6215 amdgpu_ring_write(ring, addr & 0xfffffffc);
6216 amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
6217 DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6218 amdgpu_ring_write(ring, lower_32_bits(seq));
6219 amdgpu_ring_write(ring, upper_32_bits(seq));
6223 static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
6225 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6226 uint32_t seq = ring->fence_drv.sync_seq;
6227 uint64_t addr = ring->fence_drv.gpu_addr;
6229 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6230 amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
6231 WAIT_REG_MEM_FUNCTION(3) | /* equal */
6232 WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
6233 amdgpu_ring_write(ring, addr & 0xfffffffc);
6234 amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
6235 amdgpu_ring_write(ring, seq);
6236 amdgpu_ring_write(ring, 0xffffffff);
6237 amdgpu_ring_write(ring, 4); /* poll interval */
6240 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
6241 unsigned vmid, uint64_t pd_addr)
6243 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6245 amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
6247 /* wait for the invalidate to complete */
6248 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6249 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6250 WAIT_REG_MEM_FUNCTION(0) | /* always */
6251 WAIT_REG_MEM_ENGINE(0))); /* me */
6252 amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6253 amdgpu_ring_write(ring, 0);
6254 amdgpu_ring_write(ring, 0); /* ref */
6255 amdgpu_ring_write(ring, 0); /* mask */
6256 amdgpu_ring_write(ring, 0x20); /* poll interval */
6258 /* compute doesn't have PFP */
6260 /* sync PFP to ME, otherwise we might get invalid PFP reads */
6261 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6262 amdgpu_ring_write(ring, 0x0);
6266 static u64 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
6268 return ring->adev->wb.wb[ring->wptr_offs];
6271 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
6273 struct amdgpu_device *adev = ring->adev;
6275 /* XXX check if swapping is necessary on BE */
6276 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6277 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6280 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
6284 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6285 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6287 /* RELEASE_MEM - flush caches, send int */
6288 amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
6289 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6291 EOP_TC_WB_ACTION_EN |
6292 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6294 amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6295 amdgpu_ring_write(ring, addr & 0xfffffffc);
6296 amdgpu_ring_write(ring, upper_32_bits(addr));
6297 amdgpu_ring_write(ring, lower_32_bits(seq));
6298 amdgpu_ring_write(ring, upper_32_bits(seq));
6301 static void gfx_v8_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
6302 u64 seq, unsigned int flags)
6304 /* we only allocate 32bit for each seq wb address */
6305 BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
6307 /* write fence seq to the "addr" */
6308 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6309 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6310 WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
6311 amdgpu_ring_write(ring, lower_32_bits(addr));
6312 amdgpu_ring_write(ring, upper_32_bits(addr));
6313 amdgpu_ring_write(ring, lower_32_bits(seq));
6315 if (flags & AMDGPU_FENCE_FLAG_INT) {
6316 /* set register to trigger INT */
6317 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6318 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6319 WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
6320 amdgpu_ring_write(ring, mmCPC_INT_STATUS);
6321 amdgpu_ring_write(ring, 0);
6322 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
6326 static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring)
6328 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6329 amdgpu_ring_write(ring, 0);
6332 static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
6336 if (amdgpu_sriov_vf(ring->adev))
6337 gfx_v8_0_ring_emit_ce_meta(ring);
6339 dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
6340 if (flags & AMDGPU_HAVE_CTX_SWITCH) {
6341 gfx_v8_0_ring_emit_vgt_flush(ring);
6342 /* set load_global_config & load_global_uconfig */
6344 /* set load_cs_sh_regs */
6346 /* set load_per_context_state & load_gfx_sh_regs for GFX */
6349 /* set load_ce_ram if preamble presented */
6350 if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
6353 /* still load_ce_ram if this is the first time preamble presented
6354 * although there is no context switch happens.
6356 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
6360 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6361 amdgpu_ring_write(ring, dw2);
6362 amdgpu_ring_write(ring, 0);
6365 static unsigned gfx_v8_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
6369 amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
6370 amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
6371 amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
6372 amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
6373 ret = ring->wptr & ring->buf_mask;
6374 amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
6378 static void gfx_v8_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
6382 BUG_ON(offset > ring->buf_mask);
6383 BUG_ON(ring->ring[offset] != 0x55aa55aa);
6385 cur = (ring->wptr & ring->buf_mask) - 1;
6386 if (likely(cur > offset))
6387 ring->ring[offset] = cur - offset;
6389 ring->ring[offset] = (ring->ring_size >> 2) - offset + cur;
6392 static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg,
6393 uint32_t reg_val_offs)
6395 struct amdgpu_device *adev = ring->adev;
6397 amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
6398 amdgpu_ring_write(ring, 0 | /* src: register*/
6399 (5 << 8) | /* dst: memory */
6400 (1 << 20)); /* write confirm */
6401 amdgpu_ring_write(ring, reg);
6402 amdgpu_ring_write(ring, 0);
6403 amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
6405 amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
6409 static void gfx_v8_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
6414 switch (ring->funcs->type) {
6415 case AMDGPU_RING_TYPE_GFX:
6416 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
6418 case AMDGPU_RING_TYPE_KIQ:
6419 cmd = 1 << 16; /* no inc addr */
6426 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6427 amdgpu_ring_write(ring, cmd);
6428 amdgpu_ring_write(ring, reg);
6429 amdgpu_ring_write(ring, 0);
6430 amdgpu_ring_write(ring, val);
6433 static void gfx_v8_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
6435 struct amdgpu_device *adev = ring->adev;
6438 value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
6439 value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
6440 value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
6441 value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
6442 WREG32(mmSQ_CMD, value);
6445 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
6446 enum amdgpu_interrupt_state state)
6448 WREG32_FIELD(CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE,
6449 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6452 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
6454 enum amdgpu_interrupt_state state)
6456 u32 mec_int_cntl, mec_int_cntl_reg;
6459 * amdgpu controls only the first MEC. That's why this function only
6460 * handles the setting of interrupts for this specific MEC. All other
6461 * pipes' interrupts are set by amdkfd.
6467 mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
6470 mec_int_cntl_reg = mmCP_ME1_PIPE1_INT_CNTL;
6473 mec_int_cntl_reg = mmCP_ME1_PIPE2_INT_CNTL;
6476 mec_int_cntl_reg = mmCP_ME1_PIPE3_INT_CNTL;
6479 DRM_DEBUG("invalid pipe %d\n", pipe);
6483 DRM_DEBUG("invalid me %d\n", me);
6488 case AMDGPU_IRQ_STATE_DISABLE:
6489 mec_int_cntl = RREG32(mec_int_cntl_reg);
6490 mec_int_cntl &= ~CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6491 WREG32(mec_int_cntl_reg, mec_int_cntl);
6493 case AMDGPU_IRQ_STATE_ENABLE:
6494 mec_int_cntl = RREG32(mec_int_cntl_reg);
6495 mec_int_cntl |= CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6496 WREG32(mec_int_cntl_reg, mec_int_cntl);
6503 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
6504 struct amdgpu_irq_src *source,
6506 enum amdgpu_interrupt_state state)
6508 WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE,
6509 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6514 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
6515 struct amdgpu_irq_src *source,
6517 enum amdgpu_interrupt_state state)
6519 WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE,
6520 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6525 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6526 struct amdgpu_irq_src *src,
6528 enum amdgpu_interrupt_state state)
6531 case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
6532 gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
6534 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6535 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6537 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6538 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6540 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6541 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6543 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6544 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6546 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6547 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6549 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6550 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6552 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6553 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6555 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6556 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6564 static int gfx_v8_0_set_cp_ecc_int_state(struct amdgpu_device *adev,
6565 struct amdgpu_irq_src *source,
6567 enum amdgpu_interrupt_state state)
6572 case AMDGPU_IRQ_STATE_DISABLE:
6576 case AMDGPU_IRQ_STATE_ENABLE:
6584 WREG32_FIELD(CP_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6585 WREG32_FIELD(CP_INT_CNTL_RING0, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6586 WREG32_FIELD(CP_INT_CNTL_RING1, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6587 WREG32_FIELD(CP_INT_CNTL_RING2, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6588 WREG32_FIELD(CPC_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6589 WREG32_FIELD(CP_ME1_PIPE0_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6591 WREG32_FIELD(CP_ME1_PIPE1_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6593 WREG32_FIELD(CP_ME1_PIPE2_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6595 WREG32_FIELD(CP_ME1_PIPE3_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6597 WREG32_FIELD(CP_ME2_PIPE0_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6599 WREG32_FIELD(CP_ME2_PIPE1_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6601 WREG32_FIELD(CP_ME2_PIPE2_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6603 WREG32_FIELD(CP_ME2_PIPE3_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6609 static int gfx_v8_0_set_sq_int_state(struct amdgpu_device *adev,
6610 struct amdgpu_irq_src *source,
6612 enum amdgpu_interrupt_state state)
6617 case AMDGPU_IRQ_STATE_DISABLE:
6621 case AMDGPU_IRQ_STATE_ENABLE:
6629 WREG32_FIELD(SQ_INTERRUPT_MSG_CTRL, STALL,
6635 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
6636 struct amdgpu_irq_src *source,
6637 struct amdgpu_iv_entry *entry)
6640 u8 me_id, pipe_id, queue_id;
6641 struct amdgpu_ring *ring;
6643 DRM_DEBUG("IH: CP EOP\n");
6644 me_id = (entry->ring_id & 0x0c) >> 2;
6645 pipe_id = (entry->ring_id & 0x03) >> 0;
6646 queue_id = (entry->ring_id & 0x70) >> 4;
6650 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6654 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6655 ring = &adev->gfx.compute_ring[i];
6656 /* Per-queue interrupt is supported for MEC starting from VI.
6657 * The interrupt can only be enabled/disabled per pipe instead of per queue.
6659 if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6660 amdgpu_fence_process(ring);
6667 static void gfx_v8_0_fault(struct amdgpu_device *adev,
6668 struct amdgpu_iv_entry *entry)
6670 u8 me_id, pipe_id, queue_id;
6671 struct amdgpu_ring *ring;
6674 me_id = (entry->ring_id & 0x0c) >> 2;
6675 pipe_id = (entry->ring_id & 0x03) >> 0;
6676 queue_id = (entry->ring_id & 0x70) >> 4;
6680 drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
6684 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6685 ring = &adev->gfx.compute_ring[i];
6686 if (ring->me == me_id && ring->pipe == pipe_id &&
6687 ring->queue == queue_id)
6688 drm_sched_fault(&ring->sched);
6694 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
6695 struct amdgpu_irq_src *source,
6696 struct amdgpu_iv_entry *entry)
6698 DRM_ERROR("Illegal register access in command stream\n");
6699 gfx_v8_0_fault(adev, entry);
6703 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
6704 struct amdgpu_irq_src *source,
6705 struct amdgpu_iv_entry *entry)
6707 DRM_ERROR("Illegal instruction in command stream\n");
6708 gfx_v8_0_fault(adev, entry);
6712 static int gfx_v8_0_cp_ecc_error_irq(struct amdgpu_device *adev,
6713 struct amdgpu_irq_src *source,
6714 struct amdgpu_iv_entry *entry)
6716 DRM_ERROR("CP EDC/ECC error detected.");
6720 static void gfx_v8_0_parse_sq_irq(struct amdgpu_device *adev, unsigned ih_data)
6722 u32 enc, se_id, sh_id, cu_id;
6724 int sq_edc_source = -1;
6726 enc = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_CMN, ENCODING);
6727 se_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_CMN, SE_ID);
6731 DRM_INFO("SQ general purpose intr detected:"
6732 "se_id %d, immed_overflow %d, host_reg_overflow %d,"
6733 "host_cmd_overflow %d, cmd_timestamp %d,"
6734 "reg_timestamp %d, thread_trace_buff_full %d,"
6735 "wlt %d, thread_trace %d.\n",
6737 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, IMMED_OVERFLOW),
6738 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, HOST_REG_OVERFLOW),
6739 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, HOST_CMD_OVERFLOW),
6740 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, CMD_TIMESTAMP),
6741 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, REG_TIMESTAMP),
6742 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, THREAD_TRACE_BUF_FULL),
6743 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, WLT),
6744 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, THREAD_TRACE)
6750 cu_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, CU_ID);
6751 sh_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, SH_ID);
6754 * This function can be called either directly from ISR
6755 * or from BH in which case we can access SQ_EDC_INFO
6759 mutex_lock(&adev->grbm_idx_mutex);
6760 gfx_v8_0_select_se_sh(adev, se_id, sh_id, cu_id);
6762 sq_edc_source = REG_GET_FIELD(RREG32(mmSQ_EDC_INFO), SQ_EDC_INFO, SOURCE);
6764 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6765 mutex_unlock(&adev->grbm_idx_mutex);
6769 sprintf(type, "instruction intr");
6771 sprintf(type, "EDC/ECC error");
6775 "se_id %d, sh_id %d, cu_id %d, simd_id %d, wave_id %d, vm_id %d "
6776 "trap %s, sq_ed_info.source %s.\n",
6777 type, se_id, sh_id, cu_id,
6778 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, SIMD_ID),
6779 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, WAVE_ID),
6780 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, VM_ID),
6781 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, PRIV) ? "true" : "false",
6782 (sq_edc_source != -1) ? sq_edc_source_names[sq_edc_source] : "unavailable"
6786 DRM_ERROR("SQ invalid encoding type\n.");
6790 static void gfx_v8_0_sq_irq_work_func(struct work_struct *work)
6793 struct amdgpu_device *adev = container_of(work, struct amdgpu_device, gfx.sq_work.work);
6794 struct sq_work *sq_work = container_of(work, struct sq_work, work);
6796 gfx_v8_0_parse_sq_irq(adev, sq_work->ih_data);
6799 static int gfx_v8_0_sq_irq(struct amdgpu_device *adev,
6800 struct amdgpu_irq_src *source,
6801 struct amdgpu_iv_entry *entry)
6803 unsigned ih_data = entry->src_data[0];
6806 * Try to submit work so SQ_EDC_INFO can be accessed from
6807 * BH. If previous work submission hasn't finished yet
6808 * just print whatever info is possible directly from the ISR.
6810 if (work_pending(&adev->gfx.sq_work.work)) {
6811 gfx_v8_0_parse_sq_irq(adev, ih_data);
6813 adev->gfx.sq_work.ih_data = ih_data;
6814 schedule_work(&adev->gfx.sq_work.work);
6820 static void gfx_v8_0_emit_mem_sync(struct amdgpu_ring *ring)
6822 amdgpu_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
6823 amdgpu_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
6824 PACKET3_TC_ACTION_ENA |
6825 PACKET3_SH_KCACHE_ACTION_ENA |
6826 PACKET3_SH_ICACHE_ACTION_ENA |
6827 PACKET3_TC_WB_ACTION_ENA); /* CP_COHER_CNTL */
6828 amdgpu_ring_write(ring, 0xffffffff); /* CP_COHER_SIZE */
6829 amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */
6830 amdgpu_ring_write(ring, 0x0000000A); /* poll interval */
6833 static void gfx_v8_0_emit_mem_sync_compute(struct amdgpu_ring *ring)
6835 amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 5));
6836 amdgpu_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
6837 PACKET3_TC_ACTION_ENA |
6838 PACKET3_SH_KCACHE_ACTION_ENA |
6839 PACKET3_SH_ICACHE_ACTION_ENA |
6840 PACKET3_TC_WB_ACTION_ENA); /* CP_COHER_CNTL */
6841 amdgpu_ring_write(ring, 0xffffffff); /* CP_COHER_SIZE */
6842 amdgpu_ring_write(ring, 0xff); /* CP_COHER_SIZE_HI */
6843 amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */
6844 amdgpu_ring_write(ring, 0); /* CP_COHER_BASE_HI */
6845 amdgpu_ring_write(ring, 0x0000000A); /* poll interval */
6848 static const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
6850 .early_init = gfx_v8_0_early_init,
6851 .late_init = gfx_v8_0_late_init,
6852 .sw_init = gfx_v8_0_sw_init,
6853 .sw_fini = gfx_v8_0_sw_fini,
6854 .hw_init = gfx_v8_0_hw_init,
6855 .hw_fini = gfx_v8_0_hw_fini,
6856 .suspend = gfx_v8_0_suspend,
6857 .resume = gfx_v8_0_resume,
6858 .is_idle = gfx_v8_0_is_idle,
6859 .wait_for_idle = gfx_v8_0_wait_for_idle,
6860 .check_soft_reset = gfx_v8_0_check_soft_reset,
6861 .pre_soft_reset = gfx_v8_0_pre_soft_reset,
6862 .soft_reset = gfx_v8_0_soft_reset,
6863 .post_soft_reset = gfx_v8_0_post_soft_reset,
6864 .set_clockgating_state = gfx_v8_0_set_clockgating_state,
6865 .set_powergating_state = gfx_v8_0_set_powergating_state,
6866 .get_clockgating_state = gfx_v8_0_get_clockgating_state,
6869 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
6870 .type = AMDGPU_RING_TYPE_GFX,
6872 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6873 .support_64bit_ptrs = false,
6874 .get_rptr = gfx_v8_0_ring_get_rptr,
6875 .get_wptr = gfx_v8_0_ring_get_wptr_gfx,
6876 .set_wptr = gfx_v8_0_ring_set_wptr_gfx,
6877 .emit_frame_size = /* maximum 215dw if count 16 IBs in */
6879 7 + /* PIPELINE_SYNC */
6880 VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 9 + /* VM_FLUSH */
6881 12 + /* FENCE for VM_FLUSH */
6882 20 + /* GDS switch */
6883 4 + /* double SWITCH_BUFFER,
6884 the first COND_EXEC jump to the place just
6885 prior to this double SWITCH_BUFFER */
6893 12 + 12 + /* FENCE x2 */
6894 2 + /* SWITCH_BUFFER */
6895 5, /* SURFACE_SYNC */
6896 .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_gfx */
6897 .emit_ib = gfx_v8_0_ring_emit_ib_gfx,
6898 .emit_fence = gfx_v8_0_ring_emit_fence_gfx,
6899 .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6900 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6901 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6902 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6903 .test_ring = gfx_v8_0_ring_test_ring,
6904 .test_ib = gfx_v8_0_ring_test_ib,
6905 .insert_nop = amdgpu_ring_insert_nop,
6906 .pad_ib = amdgpu_ring_generic_pad_ib,
6907 .emit_switch_buffer = gfx_v8_ring_emit_sb,
6908 .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
6909 .init_cond_exec = gfx_v8_0_ring_emit_init_cond_exec,
6910 .patch_cond_exec = gfx_v8_0_ring_emit_patch_cond_exec,
6911 .emit_wreg = gfx_v8_0_ring_emit_wreg,
6912 .soft_recovery = gfx_v8_0_ring_soft_recovery,
6913 .emit_mem_sync = gfx_v8_0_emit_mem_sync,
6916 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
6917 .type = AMDGPU_RING_TYPE_COMPUTE,
6919 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6920 .support_64bit_ptrs = false,
6921 .get_rptr = gfx_v8_0_ring_get_rptr,
6922 .get_wptr = gfx_v8_0_ring_get_wptr_compute,
6923 .set_wptr = gfx_v8_0_ring_set_wptr_compute,
6925 20 + /* gfx_v8_0_ring_emit_gds_switch */
6926 7 + /* gfx_v8_0_ring_emit_hdp_flush */
6927 5 + /* hdp_invalidate */
6928 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6929 VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v8_0_ring_emit_vm_flush */
6930 7 + 7 + 7 + /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
6931 7, /* gfx_v8_0_emit_mem_sync_compute */
6932 .emit_ib_size = 7, /* gfx_v8_0_ring_emit_ib_compute */
6933 .emit_ib = gfx_v8_0_ring_emit_ib_compute,
6934 .emit_fence = gfx_v8_0_ring_emit_fence_compute,
6935 .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6936 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6937 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6938 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6939 .test_ring = gfx_v8_0_ring_test_ring,
6940 .test_ib = gfx_v8_0_ring_test_ib,
6941 .insert_nop = amdgpu_ring_insert_nop,
6942 .pad_ib = amdgpu_ring_generic_pad_ib,
6943 .emit_wreg = gfx_v8_0_ring_emit_wreg,
6944 .emit_mem_sync = gfx_v8_0_emit_mem_sync_compute,
6947 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {
6948 .type = AMDGPU_RING_TYPE_KIQ,
6950 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6951 .support_64bit_ptrs = false,
6952 .get_rptr = gfx_v8_0_ring_get_rptr,
6953 .get_wptr = gfx_v8_0_ring_get_wptr_compute,
6954 .set_wptr = gfx_v8_0_ring_set_wptr_compute,
6956 20 + /* gfx_v8_0_ring_emit_gds_switch */
6957 7 + /* gfx_v8_0_ring_emit_hdp_flush */
6958 5 + /* hdp_invalidate */
6959 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6960 17 + /* gfx_v8_0_ring_emit_vm_flush */
6961 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */
6962 .emit_ib_size = 7, /* gfx_v8_0_ring_emit_ib_compute */
6963 .emit_fence = gfx_v8_0_ring_emit_fence_kiq,
6964 .test_ring = gfx_v8_0_ring_test_ring,
6965 .insert_nop = amdgpu_ring_insert_nop,
6966 .pad_ib = amdgpu_ring_generic_pad_ib,
6967 .emit_rreg = gfx_v8_0_ring_emit_rreg,
6968 .emit_wreg = gfx_v8_0_ring_emit_wreg,
6971 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
6975 adev->gfx.kiq.ring.funcs = &gfx_v8_0_ring_funcs_kiq;
6977 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
6978 adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
6980 for (i = 0; i < adev->gfx.num_compute_rings; i++)
6981 adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
6984 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
6985 .set = gfx_v8_0_set_eop_interrupt_state,
6986 .process = gfx_v8_0_eop_irq,
6989 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
6990 .set = gfx_v8_0_set_priv_reg_fault_state,
6991 .process = gfx_v8_0_priv_reg_irq,
6994 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
6995 .set = gfx_v8_0_set_priv_inst_fault_state,
6996 .process = gfx_v8_0_priv_inst_irq,
6999 static const struct amdgpu_irq_src_funcs gfx_v8_0_cp_ecc_error_irq_funcs = {
7000 .set = gfx_v8_0_set_cp_ecc_int_state,
7001 .process = gfx_v8_0_cp_ecc_error_irq,
7004 static const struct amdgpu_irq_src_funcs gfx_v8_0_sq_irq_funcs = {
7005 .set = gfx_v8_0_set_sq_int_state,
7006 .process = gfx_v8_0_sq_irq,
7009 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
7011 adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
7012 adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
7014 adev->gfx.priv_reg_irq.num_types = 1;
7015 adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
7017 adev->gfx.priv_inst_irq.num_types = 1;
7018 adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
7020 adev->gfx.cp_ecc_error_irq.num_types = 1;
7021 adev->gfx.cp_ecc_error_irq.funcs = &gfx_v8_0_cp_ecc_error_irq_funcs;
7023 adev->gfx.sq_irq.num_types = 1;
7024 adev->gfx.sq_irq.funcs = &gfx_v8_0_sq_irq_funcs;
7027 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
7029 adev->gfx.rlc.funcs = &iceland_rlc_funcs;
7032 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
7034 /* init asci gds info */
7035 adev->gds.gds_size = RREG32(mmGDS_VMID0_SIZE);
7036 adev->gds.gws_size = 64;
7037 adev->gds.oa_size = 16;
7038 adev->gds.gds_compute_max_wave_id = RREG32(mmGDS_COMPUTE_MAX_WAVE_ID);
7041 static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
7049 data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7050 data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7052 WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
7055 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
7059 data = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) |
7060 RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
7062 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
7064 return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask;
7067 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
7069 int i, j, k, counter, active_cu_number = 0;
7070 u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
7071 struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
7072 unsigned disable_masks[4 * 2];
7075 memset(cu_info, 0, sizeof(*cu_info));
7077 if (adev->flags & AMD_IS_APU)
7080 ao_cu_num = adev->gfx.config.max_cu_per_sh;
7082 amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
7084 mutex_lock(&adev->grbm_idx_mutex);
7085 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
7086 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
7090 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
7092 gfx_v8_0_set_user_cu_inactive_bitmap(
7093 adev, disable_masks[i * 2 + j]);
7094 bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
7095 cu_info->bitmap[i][j] = bitmap;
7097 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
7098 if (bitmap & mask) {
7099 if (counter < ao_cu_num)
7105 active_cu_number += counter;
7107 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
7108 cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
7111 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
7112 mutex_unlock(&adev->grbm_idx_mutex);
7114 cu_info->number = active_cu_number;
7115 cu_info->ao_cu_mask = ao_cu_mask;
7116 cu_info->simd_per_cu = NUM_SIMD_PER_CU;
7117 cu_info->max_waves_per_simd = 10;
7118 cu_info->max_scratch_slots_per_cu = 32;
7119 cu_info->wave_front_size = 64;
7120 cu_info->lds_size = 64;
7123 const struct amdgpu_ip_block_version gfx_v8_0_ip_block =
7125 .type = AMD_IP_BLOCK_TYPE_GFX,
7129 .funcs = &gfx_v8_0_ip_funcs,
7132 const struct amdgpu_ip_block_version gfx_v8_1_ip_block =
7134 .type = AMD_IP_BLOCK_TYPE_GFX,
7138 .funcs = &gfx_v8_0_ip_funcs,
7141 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
7143 uint64_t ce_payload_addr;
7146 struct vi_ce_ib_state regular;
7147 struct vi_ce_ib_state_chained_ib chained;
7150 if (ring->adev->virt.chained_ib_support) {
7151 ce_payload_addr = amdgpu_csa_vaddr(ring->adev) +
7152 offsetof(struct vi_gfx_meta_data_chained_ib, ce_payload);
7153 cnt_ce = (sizeof(ce_payload.chained) >> 2) + 4 - 2;
7155 ce_payload_addr = amdgpu_csa_vaddr(ring->adev) +
7156 offsetof(struct vi_gfx_meta_data, ce_payload);
7157 cnt_ce = (sizeof(ce_payload.regular) >> 2) + 4 - 2;
7160 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_ce));
7161 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
7162 WRITE_DATA_DST_SEL(8) |
7164 WRITE_DATA_CACHE_POLICY(0));
7165 amdgpu_ring_write(ring, lower_32_bits(ce_payload_addr));
7166 amdgpu_ring_write(ring, upper_32_bits(ce_payload_addr));
7167 amdgpu_ring_write_multiple(ring, (void *)&ce_payload, cnt_ce - 2);
7170 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring)
7172 uint64_t de_payload_addr, gds_addr, csa_addr;
7175 struct vi_de_ib_state regular;
7176 struct vi_de_ib_state_chained_ib chained;
7179 csa_addr = amdgpu_csa_vaddr(ring->adev);
7180 gds_addr = csa_addr + 4096;
7181 if (ring->adev->virt.chained_ib_support) {
7182 de_payload.chained.gds_backup_addrlo = lower_32_bits(gds_addr);
7183 de_payload.chained.gds_backup_addrhi = upper_32_bits(gds_addr);
7184 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data_chained_ib, de_payload);
7185 cnt_de = (sizeof(de_payload.chained) >> 2) + 4 - 2;
7187 de_payload.regular.gds_backup_addrlo = lower_32_bits(gds_addr);
7188 de_payload.regular.gds_backup_addrhi = upper_32_bits(gds_addr);
7189 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data, de_payload);
7190 cnt_de = (sizeof(de_payload.regular) >> 2) + 4 - 2;
7193 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_de));
7194 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
7195 WRITE_DATA_DST_SEL(8) |
7197 WRITE_DATA_CACHE_POLICY(0));
7198 amdgpu_ring_write(ring, lower_32_bits(de_payload_addr));
7199 amdgpu_ring_write(ring, upper_32_bits(de_payload_addr));
7200 amdgpu_ring_write_multiple(ring, (void *)&de_payload, cnt_de - 2);