]> Git Repo - linux.git/blob - drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
drm/amdgpu: delete pp_enable in adev
[linux.git] / drivers / gpu / drm / amd / amdgpu / gfx_v8_0.c
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 #include <linux/firmware.h>
24 #include <drm/drmP.h>
25 #include "amdgpu.h"
26 #include "amdgpu_gfx.h"
27 #include "vi.h"
28 #include "vi_structs.h"
29 #include "vid.h"
30 #include "amdgpu_ucode.h"
31 #include "amdgpu_atombios.h"
32 #include "atombios_i2c.h"
33 #include "clearstate_vi.h"
34
35 #include "gmc/gmc_8_2_d.h"
36 #include "gmc/gmc_8_2_sh_mask.h"
37
38 #include "oss/oss_3_0_d.h"
39 #include "oss/oss_3_0_sh_mask.h"
40
41 #include "bif/bif_5_0_d.h"
42 #include "bif/bif_5_0_sh_mask.h"
43 #include "gca/gfx_8_0_d.h"
44 #include "gca/gfx_8_0_enum.h"
45 #include "gca/gfx_8_0_sh_mask.h"
46 #include "gca/gfx_8_0_enum.h"
47
48 #include "dce/dce_10_0_d.h"
49 #include "dce/dce_10_0_sh_mask.h"
50
51 #include "smu/smu_7_1_3_d.h"
52
53 #define GFX8_NUM_GFX_RINGS     1
54 #define GFX8_MEC_HPD_SIZE 2048
55
56 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
57 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
58 #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
59 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
60
61 #define ARRAY_MODE(x)                                   ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
62 #define PIPE_CONFIG(x)                                  ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
63 #define TILE_SPLIT(x)                                   ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
64 #define MICRO_TILE_MODE_NEW(x)                          ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
65 #define SAMPLE_SPLIT(x)                                 ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
66 #define BANK_WIDTH(x)                                   ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
67 #define BANK_HEIGHT(x)                                  ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
68 #define MACRO_TILE_ASPECT(x)                            ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
69 #define NUM_BANKS(x)                                    ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
70
71 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK            0x00000001L
72 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK            0x00000002L
73 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK           0x00000004L
74 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK           0x00000008L
75 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK           0x00000010L
76 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK           0x00000020L
77
78 /* BPM SERDES CMD */
79 #define SET_BPM_SERDES_CMD    1
80 #define CLE_BPM_SERDES_CMD    0
81
82 /* BPM Register Address*/
83 enum {
84         BPM_REG_CGLS_EN = 0,        /* Enable/Disable CGLS */
85         BPM_REG_CGLS_ON,            /* ON/OFF CGLS: shall be controlled by RLC FW */
86         BPM_REG_CGCG_OVERRIDE,      /* Set/Clear CGCG Override */
87         BPM_REG_MGCG_OVERRIDE,      /* Set/Clear MGCG Override */
88         BPM_REG_FGCG_OVERRIDE,      /* Set/Clear FGCG Override */
89         BPM_REG_FGCG_MAX
90 };
91
92 #define RLC_FormatDirectRegListLength        14
93
94 MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
95 MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
96 MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
97 MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
98 MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
99 MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
100
101 MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
102 MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
103 MODULE_FIRMWARE("amdgpu/stoney_me.bin");
104 MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
105 MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
106
107 MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
108 MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
109 MODULE_FIRMWARE("amdgpu/tonga_me.bin");
110 MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
111 MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
112 MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
113
114 MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
115 MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
116 MODULE_FIRMWARE("amdgpu/topaz_me.bin");
117 MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
118 MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
119
120 MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
121 MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
122 MODULE_FIRMWARE("amdgpu/fiji_me.bin");
123 MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
124 MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
125 MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
126
127 MODULE_FIRMWARE("amdgpu/polaris11_ce.bin");
128 MODULE_FIRMWARE("amdgpu/polaris11_ce_2.bin");
129 MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin");
130 MODULE_FIRMWARE("amdgpu/polaris11_pfp_2.bin");
131 MODULE_FIRMWARE("amdgpu/polaris11_me.bin");
132 MODULE_FIRMWARE("amdgpu/polaris11_me_2.bin");
133 MODULE_FIRMWARE("amdgpu/polaris11_mec.bin");
134 MODULE_FIRMWARE("amdgpu/polaris11_mec_2.bin");
135 MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin");
136 MODULE_FIRMWARE("amdgpu/polaris11_mec2_2.bin");
137 MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin");
138
139 MODULE_FIRMWARE("amdgpu/polaris10_ce.bin");
140 MODULE_FIRMWARE("amdgpu/polaris10_ce_2.bin");
141 MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin");
142 MODULE_FIRMWARE("amdgpu/polaris10_pfp_2.bin");
143 MODULE_FIRMWARE("amdgpu/polaris10_me.bin");
144 MODULE_FIRMWARE("amdgpu/polaris10_me_2.bin");
145 MODULE_FIRMWARE("amdgpu/polaris10_mec.bin");
146 MODULE_FIRMWARE("amdgpu/polaris10_mec_2.bin");
147 MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin");
148 MODULE_FIRMWARE("amdgpu/polaris10_mec2_2.bin");
149 MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin");
150
151 MODULE_FIRMWARE("amdgpu/polaris12_ce.bin");
152 MODULE_FIRMWARE("amdgpu/polaris12_ce_2.bin");
153 MODULE_FIRMWARE("amdgpu/polaris12_pfp.bin");
154 MODULE_FIRMWARE("amdgpu/polaris12_pfp_2.bin");
155 MODULE_FIRMWARE("amdgpu/polaris12_me.bin");
156 MODULE_FIRMWARE("amdgpu/polaris12_me_2.bin");
157 MODULE_FIRMWARE("amdgpu/polaris12_mec.bin");
158 MODULE_FIRMWARE("amdgpu/polaris12_mec_2.bin");
159 MODULE_FIRMWARE("amdgpu/polaris12_mec2.bin");
160 MODULE_FIRMWARE("amdgpu/polaris12_mec2_2.bin");
161 MODULE_FIRMWARE("amdgpu/polaris12_rlc.bin");
162
163 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
164 {
165         {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
166         {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
167         {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
168         {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
169         {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
170         {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
171         {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
172         {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
173         {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
174         {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
175         {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
176         {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
177         {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
178         {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
179         {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
180         {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
181 };
182
183 static const u32 golden_settings_tonga_a11[] =
184 {
185         mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
186         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
187         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
188         mmGB_GPU_ID, 0x0000000f, 0x00000000,
189         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
190         mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
191         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
192         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
193         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
194         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
195         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
196         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
197         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
198         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
199         mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
200         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
201 };
202
203 static const u32 tonga_golden_common_all[] =
204 {
205         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
206         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
207         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
208         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
209         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
210         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
211         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
212         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
213 };
214
215 static const u32 tonga_mgcg_cgcg_init[] =
216 {
217         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
218         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
219         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
220         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
221         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
222         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
223         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
224         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
225         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
226         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
227         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
228         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
229         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
230         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
231         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
232         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
233         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
234         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
235         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
236         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
237         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
238         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
239         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
240         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
241         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
242         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
243         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
244         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
245         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
246         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
247         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
248         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
249         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
250         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
251         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
252         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
253         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
254         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
255         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
256         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
257         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
258         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
259         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
260         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
261         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
262         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
263         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
264         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
265         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
266         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
267         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
268         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
269         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
270         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
271         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
272         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
273         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
274         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
275         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
276         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
277         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
278         mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
279         mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
280         mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
281         mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
282         mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
283         mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
284         mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
285         mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
286         mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
287         mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
288         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
289         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
290         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
291         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
292 };
293
294 static const u32 golden_settings_polaris11_a11[] =
295 {
296         mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208,
297         mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
298         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
299         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
300         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
301         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
302         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
303         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
304         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
305         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
306         mmSQ_CONFIG, 0x07f80000, 0x01180000,
307         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
308         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
309         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
310         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
311         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
312         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
313 };
314
315 static const u32 polaris11_golden_common_all[] =
316 {
317         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
318         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002,
319         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
320         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
321         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
322         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
323 };
324
325 static const u32 golden_settings_polaris10_a11[] =
326 {
327         mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
328         mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
329         mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
330         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
331         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
332         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
333         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
334         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
335         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a,
336         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
337         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
338         mmSQ_CONFIG, 0x07f80000, 0x07180000,
339         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
340         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
341         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
342         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
343         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
344 };
345
346 static const u32 polaris10_golden_common_all[] =
347 {
348         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
349         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
350         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
351         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
352         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
353         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
354         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
355         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
356 };
357
358 static const u32 fiji_golden_common_all[] =
359 {
360         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
361         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
362         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
363         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
364         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
365         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
366         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
367         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
368         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
369         mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
370 };
371
372 static const u32 golden_settings_fiji_a10[] =
373 {
374         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
375         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
376         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
377         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
378         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
379         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
380         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
381         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
382         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
383         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
384         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
385 };
386
387 static const u32 fiji_mgcg_cgcg_init[] =
388 {
389         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
390         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
391         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
392         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
393         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
394         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
395         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
396         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
397         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
398         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
399         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
400         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
401         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
402         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
403         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
404         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
405         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
406         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
407         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
408         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
409         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
410         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
411         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
412         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
413         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
414         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
415         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
416         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
417         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
418         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
419         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
420         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
421         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
422         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
423         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
424 };
425
426 static const u32 golden_settings_iceland_a11[] =
427 {
428         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
429         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
430         mmDB_DEBUG3, 0xc0000000, 0xc0000000,
431         mmGB_GPU_ID, 0x0000000f, 0x00000000,
432         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
433         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
434         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
435         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
436         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
437         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
438         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
439         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
440         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
441         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
442         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
443         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
444 };
445
446 static const u32 iceland_golden_common_all[] =
447 {
448         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
449         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
450         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
451         mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
452         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
453         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
454         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
455         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
456 };
457
458 static const u32 iceland_mgcg_cgcg_init[] =
459 {
460         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
461         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
462         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
463         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
464         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
465         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
466         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
467         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
468         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
469         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
470         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
471         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
472         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
473         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
474         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
475         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
476         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
477         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
478         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
479         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
480         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
481         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
482         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
483         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
484         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
485         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
486         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
487         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
488         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
489         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
490         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
491         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
492         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
493         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
494         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
495         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
496         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
497         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
498         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
499         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
500         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
501         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
502         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
503         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
504         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
505         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
506         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
507         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
508         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
509         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
510         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
511         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
512         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
513         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
514         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
515         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
516         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
517         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
518         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
519         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
520         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
521         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
522         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
523         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
524 };
525
526 static const u32 cz_golden_settings_a11[] =
527 {
528         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
529         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
530         mmGB_GPU_ID, 0x0000000f, 0x00000000,
531         mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
532         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
533         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
534         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
535         mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
536         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
537         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
538         mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
539         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
540 };
541
542 static const u32 cz_golden_common_all[] =
543 {
544         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
545         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
546         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
547         mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
548         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
549         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
550         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
551         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
552 };
553
554 static const u32 cz_mgcg_cgcg_init[] =
555 {
556         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
557         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
558         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
559         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
560         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
561         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
562         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
563         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
564         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
565         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
566         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
567         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
568         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
569         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
570         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
571         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
572         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
573         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
574         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
575         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
576         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
577         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
578         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
579         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
580         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
581         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
582         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
583         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
584         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
585         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
586         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
587         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
588         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
589         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
590         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
591         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
592         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
593         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
594         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
595         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
596         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
597         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
598         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
599         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
600         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
601         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
602         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
603         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
604         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
605         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
606         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
607         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
608         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
609         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
610         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
611         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
612         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
613         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
614         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
615         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
616         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
617         mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
618         mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
619         mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
620         mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
621         mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
622         mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
623         mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
624         mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
625         mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
626         mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
627         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
628         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
629         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
630         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
631 };
632
633 static const u32 stoney_golden_settings_a11[] =
634 {
635         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
636         mmGB_GPU_ID, 0x0000000f, 0x00000000,
637         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
638         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
639         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
640         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
641         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
642         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
643         mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
644         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
645 };
646
647 static const u32 stoney_golden_common_all[] =
648 {
649         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
650         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
651         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
652         mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
653         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
654         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
655         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
656         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
657 };
658
659 static const u32 stoney_mgcg_cgcg_init[] =
660 {
661         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
662         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
663         mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
664         mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
665         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
666 };
667
668 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
669 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
670 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
671 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
672 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
673 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
674 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring);
675 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring);
676
677 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
678 {
679         switch (adev->asic_type) {
680         case CHIP_TOPAZ:
681                 amdgpu_program_register_sequence(adev,
682                                                  iceland_mgcg_cgcg_init,
683                                                  (const u32)ARRAY_SIZE(iceland_mgcg_cgcg_init));
684                 amdgpu_program_register_sequence(adev,
685                                                  golden_settings_iceland_a11,
686                                                  (const u32)ARRAY_SIZE(golden_settings_iceland_a11));
687                 amdgpu_program_register_sequence(adev,
688                                                  iceland_golden_common_all,
689                                                  (const u32)ARRAY_SIZE(iceland_golden_common_all));
690                 break;
691         case CHIP_FIJI:
692                 amdgpu_program_register_sequence(adev,
693                                                  fiji_mgcg_cgcg_init,
694                                                  (const u32)ARRAY_SIZE(fiji_mgcg_cgcg_init));
695                 amdgpu_program_register_sequence(adev,
696                                                  golden_settings_fiji_a10,
697                                                  (const u32)ARRAY_SIZE(golden_settings_fiji_a10));
698                 amdgpu_program_register_sequence(adev,
699                                                  fiji_golden_common_all,
700                                                  (const u32)ARRAY_SIZE(fiji_golden_common_all));
701                 break;
702
703         case CHIP_TONGA:
704                 amdgpu_program_register_sequence(adev,
705                                                  tonga_mgcg_cgcg_init,
706                                                  (const u32)ARRAY_SIZE(tonga_mgcg_cgcg_init));
707                 amdgpu_program_register_sequence(adev,
708                                                  golden_settings_tonga_a11,
709                                                  (const u32)ARRAY_SIZE(golden_settings_tonga_a11));
710                 amdgpu_program_register_sequence(adev,
711                                                  tonga_golden_common_all,
712                                                  (const u32)ARRAY_SIZE(tonga_golden_common_all));
713                 break;
714         case CHIP_POLARIS11:
715         case CHIP_POLARIS12:
716                 amdgpu_program_register_sequence(adev,
717                                                  golden_settings_polaris11_a11,
718                                                  (const u32)ARRAY_SIZE(golden_settings_polaris11_a11));
719                 amdgpu_program_register_sequence(adev,
720                                                  polaris11_golden_common_all,
721                                                  (const u32)ARRAY_SIZE(polaris11_golden_common_all));
722                 break;
723         case CHIP_POLARIS10:
724                 amdgpu_program_register_sequence(adev,
725                                                  golden_settings_polaris10_a11,
726                                                  (const u32)ARRAY_SIZE(golden_settings_polaris10_a11));
727                 amdgpu_program_register_sequence(adev,
728                                                  polaris10_golden_common_all,
729                                                  (const u32)ARRAY_SIZE(polaris10_golden_common_all));
730                 WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C);
731                 if (adev->pdev->revision == 0xc7 &&
732                     ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) ||
733                      (adev->pdev->subsystem_device == 0x4a8 && adev->pdev->subsystem_vendor == 0x1043) ||
734                      (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1682))) {
735                         amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD);
736                         amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0);
737                 }
738                 break;
739         case CHIP_CARRIZO:
740                 amdgpu_program_register_sequence(adev,
741                                                  cz_mgcg_cgcg_init,
742                                                  (const u32)ARRAY_SIZE(cz_mgcg_cgcg_init));
743                 amdgpu_program_register_sequence(adev,
744                                                  cz_golden_settings_a11,
745                                                  (const u32)ARRAY_SIZE(cz_golden_settings_a11));
746                 amdgpu_program_register_sequence(adev,
747                                                  cz_golden_common_all,
748                                                  (const u32)ARRAY_SIZE(cz_golden_common_all));
749                 break;
750         case CHIP_STONEY:
751                 amdgpu_program_register_sequence(adev,
752                                                  stoney_mgcg_cgcg_init,
753                                                  (const u32)ARRAY_SIZE(stoney_mgcg_cgcg_init));
754                 amdgpu_program_register_sequence(adev,
755                                                  stoney_golden_settings_a11,
756                                                  (const u32)ARRAY_SIZE(stoney_golden_settings_a11));
757                 amdgpu_program_register_sequence(adev,
758                                                  stoney_golden_common_all,
759                                                  (const u32)ARRAY_SIZE(stoney_golden_common_all));
760                 break;
761         default:
762                 break;
763         }
764 }
765
766 static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
767 {
768         adev->gfx.scratch.num_reg = 8;
769         adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
770         adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
771 }
772
773 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
774 {
775         struct amdgpu_device *adev = ring->adev;
776         uint32_t scratch;
777         uint32_t tmp = 0;
778         unsigned i;
779         int r;
780
781         r = amdgpu_gfx_scratch_get(adev, &scratch);
782         if (r) {
783                 DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
784                 return r;
785         }
786         WREG32(scratch, 0xCAFEDEAD);
787         r = amdgpu_ring_alloc(ring, 3);
788         if (r) {
789                 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
790                           ring->idx, r);
791                 amdgpu_gfx_scratch_free(adev, scratch);
792                 return r;
793         }
794         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
795         amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
796         amdgpu_ring_write(ring, 0xDEADBEEF);
797         amdgpu_ring_commit(ring);
798
799         for (i = 0; i < adev->usec_timeout; i++) {
800                 tmp = RREG32(scratch);
801                 if (tmp == 0xDEADBEEF)
802                         break;
803                 DRM_UDELAY(1);
804         }
805         if (i < adev->usec_timeout) {
806                 DRM_INFO("ring test on %d succeeded in %d usecs\n",
807                          ring->idx, i);
808         } else {
809                 DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
810                           ring->idx, scratch, tmp);
811                 r = -EINVAL;
812         }
813         amdgpu_gfx_scratch_free(adev, scratch);
814         return r;
815 }
816
817 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
818 {
819         struct amdgpu_device *adev = ring->adev;
820         struct amdgpu_ib ib;
821         struct dma_fence *f = NULL;
822         uint32_t scratch;
823         uint32_t tmp = 0;
824         long r;
825
826         r = amdgpu_gfx_scratch_get(adev, &scratch);
827         if (r) {
828                 DRM_ERROR("amdgpu: failed to get scratch reg (%ld).\n", r);
829                 return r;
830         }
831         WREG32(scratch, 0xCAFEDEAD);
832         memset(&ib, 0, sizeof(ib));
833         r = amdgpu_ib_get(adev, NULL, 256, &ib);
834         if (r) {
835                 DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
836                 goto err1;
837         }
838         ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
839         ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START));
840         ib.ptr[2] = 0xDEADBEEF;
841         ib.length_dw = 3;
842
843         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
844         if (r)
845                 goto err2;
846
847         r = dma_fence_wait_timeout(f, false, timeout);
848         if (r == 0) {
849                 DRM_ERROR("amdgpu: IB test timed out.\n");
850                 r = -ETIMEDOUT;
851                 goto err2;
852         } else if (r < 0) {
853                 DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
854                 goto err2;
855         }
856         tmp = RREG32(scratch);
857         if (tmp == 0xDEADBEEF) {
858                 DRM_INFO("ib test on ring %d succeeded\n", ring->idx);
859                 r = 0;
860         } else {
861                 DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n",
862                           scratch, tmp);
863                 r = -EINVAL;
864         }
865 err2:
866         amdgpu_ib_free(adev, &ib, NULL);
867         dma_fence_put(f);
868 err1:
869         amdgpu_gfx_scratch_free(adev, scratch);
870         return r;
871 }
872
873
874 static void gfx_v8_0_free_microcode(struct amdgpu_device *adev)
875 {
876         release_firmware(adev->gfx.pfp_fw);
877         adev->gfx.pfp_fw = NULL;
878         release_firmware(adev->gfx.me_fw);
879         adev->gfx.me_fw = NULL;
880         release_firmware(adev->gfx.ce_fw);
881         adev->gfx.ce_fw = NULL;
882         release_firmware(adev->gfx.rlc_fw);
883         adev->gfx.rlc_fw = NULL;
884         release_firmware(adev->gfx.mec_fw);
885         adev->gfx.mec_fw = NULL;
886         if ((adev->asic_type != CHIP_STONEY) &&
887             (adev->asic_type != CHIP_TOPAZ))
888                 release_firmware(adev->gfx.mec2_fw);
889         adev->gfx.mec2_fw = NULL;
890
891         kfree(adev->gfx.rlc.register_list_format);
892 }
893
894 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
895 {
896         const char *chip_name;
897         char fw_name[30];
898         int err;
899         struct amdgpu_firmware_info *info = NULL;
900         const struct common_firmware_header *header = NULL;
901         const struct gfx_firmware_header_v1_0 *cp_hdr;
902         const struct rlc_firmware_header_v2_0 *rlc_hdr;
903         unsigned int *tmp = NULL, i;
904
905         DRM_DEBUG("\n");
906
907         switch (adev->asic_type) {
908         case CHIP_TOPAZ:
909                 chip_name = "topaz";
910                 break;
911         case CHIP_TONGA:
912                 chip_name = "tonga";
913                 break;
914         case CHIP_CARRIZO:
915                 chip_name = "carrizo";
916                 break;
917         case CHIP_FIJI:
918                 chip_name = "fiji";
919                 break;
920         case CHIP_POLARIS11:
921                 chip_name = "polaris11";
922                 break;
923         case CHIP_POLARIS10:
924                 chip_name = "polaris10";
925                 break;
926         case CHIP_POLARIS12:
927                 chip_name = "polaris12";
928                 break;
929         case CHIP_STONEY:
930                 chip_name = "stoney";
931                 break;
932         default:
933                 BUG();
934         }
935
936         if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
937                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp_2.bin", chip_name);
938                 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
939                 if (err == -ENOENT) {
940                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
941                         err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
942                 }
943         } else {
944                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
945                 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
946         }
947         if (err)
948                 goto out;
949         err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
950         if (err)
951                 goto out;
952         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
953         adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
954         adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
955
956         if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
957                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me_2.bin", chip_name);
958                 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
959                 if (err == -ENOENT) {
960                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
961                         err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
962                 }
963         } else {
964                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
965                 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
966         }
967         if (err)
968                 goto out;
969         err = amdgpu_ucode_validate(adev->gfx.me_fw);
970         if (err)
971                 goto out;
972         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
973         adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
974
975         adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
976
977         if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
978                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce_2.bin", chip_name);
979                 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
980                 if (err == -ENOENT) {
981                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
982                         err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
983                 }
984         } else {
985                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
986                 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
987         }
988         if (err)
989                 goto out;
990         err = amdgpu_ucode_validate(adev->gfx.ce_fw);
991         if (err)
992                 goto out;
993         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
994         adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
995         adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
996
997         /*
998          * Support for MCBP/Virtualization in combination with chained IBs is
999          * formal released on feature version #46
1000          */
1001         if (adev->gfx.ce_feature_version >= 46 &&
1002             adev->gfx.pfp_feature_version >= 46) {
1003                 adev->virt.chained_ib_support = true;
1004                 DRM_INFO("Chained IB support enabled!\n");
1005         } else
1006                 adev->virt.chained_ib_support = false;
1007
1008         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
1009         err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
1010         if (err)
1011                 goto out;
1012         err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
1013         rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1014         adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
1015         adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
1016
1017         adev->gfx.rlc.save_and_restore_offset =
1018                         le32_to_cpu(rlc_hdr->save_and_restore_offset);
1019         adev->gfx.rlc.clear_state_descriptor_offset =
1020                         le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
1021         adev->gfx.rlc.avail_scratch_ram_locations =
1022                         le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
1023         adev->gfx.rlc.reg_restore_list_size =
1024                         le32_to_cpu(rlc_hdr->reg_restore_list_size);
1025         adev->gfx.rlc.reg_list_format_start =
1026                         le32_to_cpu(rlc_hdr->reg_list_format_start);
1027         adev->gfx.rlc.reg_list_format_separate_start =
1028                         le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
1029         adev->gfx.rlc.starting_offsets_start =
1030                         le32_to_cpu(rlc_hdr->starting_offsets_start);
1031         adev->gfx.rlc.reg_list_format_size_bytes =
1032                         le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
1033         adev->gfx.rlc.reg_list_size_bytes =
1034                         le32_to_cpu(rlc_hdr->reg_list_size_bytes);
1035
1036         adev->gfx.rlc.register_list_format =
1037                         kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
1038                                         adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
1039
1040         if (!adev->gfx.rlc.register_list_format) {
1041                 err = -ENOMEM;
1042                 goto out;
1043         }
1044
1045         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1046                         le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1047         for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++)
1048                 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
1049
1050         adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1051
1052         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1053                         le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1054         for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++)
1055                 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
1056
1057         if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1058                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec_2.bin", chip_name);
1059                 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1060                 if (err == -ENOENT) {
1061                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1062                         err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1063                 }
1064         } else {
1065                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1066                 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1067         }
1068         if (err)
1069                 goto out;
1070         err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1071         if (err)
1072                 goto out;
1073         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1074         adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1075         adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1076
1077         if ((adev->asic_type != CHIP_STONEY) &&
1078             (adev->asic_type != CHIP_TOPAZ)) {
1079                 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1080                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2_2.bin", chip_name);
1081                         err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1082                         if (err == -ENOENT) {
1083                                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1084                                 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1085                         }
1086                 } else {
1087                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1088                         err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1089                 }
1090                 if (!err) {
1091                         err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1092                         if (err)
1093                                 goto out;
1094                         cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1095                                 adev->gfx.mec2_fw->data;
1096                         adev->gfx.mec2_fw_version =
1097                                 le32_to_cpu(cp_hdr->header.ucode_version);
1098                         adev->gfx.mec2_feature_version =
1099                                 le32_to_cpu(cp_hdr->ucode_feature_version);
1100                 } else {
1101                         err = 0;
1102                         adev->gfx.mec2_fw = NULL;
1103                 }
1104         }
1105
1106         if (adev->firmware.load_type == AMDGPU_FW_LOAD_SMU) {
1107                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1108                 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1109                 info->fw = adev->gfx.pfp_fw;
1110                 header = (const struct common_firmware_header *)info->fw->data;
1111                 adev->firmware.fw_size +=
1112                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1113
1114                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1115                 info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1116                 info->fw = adev->gfx.me_fw;
1117                 header = (const struct common_firmware_header *)info->fw->data;
1118                 adev->firmware.fw_size +=
1119                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1120
1121                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1122                 info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1123                 info->fw = adev->gfx.ce_fw;
1124                 header = (const struct common_firmware_header *)info->fw->data;
1125                 adev->firmware.fw_size +=
1126                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1127
1128                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1129                 info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1130                 info->fw = adev->gfx.rlc_fw;
1131                 header = (const struct common_firmware_header *)info->fw->data;
1132                 adev->firmware.fw_size +=
1133                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1134
1135                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1136                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1137                 info->fw = adev->gfx.mec_fw;
1138                 header = (const struct common_firmware_header *)info->fw->data;
1139                 adev->firmware.fw_size +=
1140                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1141
1142                 /* we need account JT in */
1143                 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1144                 adev->firmware.fw_size +=
1145                         ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE);
1146
1147                 if (amdgpu_sriov_vf(adev)) {
1148                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_STORAGE];
1149                         info->ucode_id = AMDGPU_UCODE_ID_STORAGE;
1150                         info->fw = adev->gfx.mec_fw;
1151                         adev->firmware.fw_size +=
1152                                 ALIGN(le32_to_cpu(64 * PAGE_SIZE), PAGE_SIZE);
1153                 }
1154
1155                 if (adev->gfx.mec2_fw) {
1156                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1157                         info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1158                         info->fw = adev->gfx.mec2_fw;
1159                         header = (const struct common_firmware_header *)info->fw->data;
1160                         adev->firmware.fw_size +=
1161                                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1162                 }
1163
1164         }
1165
1166 out:
1167         if (err) {
1168                 dev_err(adev->dev,
1169                         "gfx8: Failed to load firmware \"%s\"\n",
1170                         fw_name);
1171                 release_firmware(adev->gfx.pfp_fw);
1172                 adev->gfx.pfp_fw = NULL;
1173                 release_firmware(adev->gfx.me_fw);
1174                 adev->gfx.me_fw = NULL;
1175                 release_firmware(adev->gfx.ce_fw);
1176                 adev->gfx.ce_fw = NULL;
1177                 release_firmware(adev->gfx.rlc_fw);
1178                 adev->gfx.rlc_fw = NULL;
1179                 release_firmware(adev->gfx.mec_fw);
1180                 adev->gfx.mec_fw = NULL;
1181                 release_firmware(adev->gfx.mec2_fw);
1182                 adev->gfx.mec2_fw = NULL;
1183         }
1184         return err;
1185 }
1186
1187 static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
1188                                     volatile u32 *buffer)
1189 {
1190         u32 count = 0, i;
1191         const struct cs_section_def *sect = NULL;
1192         const struct cs_extent_def *ext = NULL;
1193
1194         if (adev->gfx.rlc.cs_data == NULL)
1195                 return;
1196         if (buffer == NULL)
1197                 return;
1198
1199         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1200         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1201
1202         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1203         buffer[count++] = cpu_to_le32(0x80000000);
1204         buffer[count++] = cpu_to_le32(0x80000000);
1205
1206         for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1207                 for (ext = sect->section; ext->extent != NULL; ++ext) {
1208                         if (sect->id == SECT_CONTEXT) {
1209                                 buffer[count++] =
1210                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1211                                 buffer[count++] = cpu_to_le32(ext->reg_index -
1212                                                 PACKET3_SET_CONTEXT_REG_START);
1213                                 for (i = 0; i < ext->reg_count; i++)
1214                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
1215                         } else {
1216                                 return;
1217                         }
1218                 }
1219         }
1220
1221         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1222         buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
1223                         PACKET3_SET_CONTEXT_REG_START);
1224         buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config);
1225         buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1);
1226
1227         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1228         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1229
1230         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1231         buffer[count++] = cpu_to_le32(0);
1232 }
1233
1234 static void cz_init_cp_jump_table(struct amdgpu_device *adev)
1235 {
1236         const __le32 *fw_data;
1237         volatile u32 *dst_ptr;
1238         int me, i, max_me = 4;
1239         u32 bo_offset = 0;
1240         u32 table_offset, table_size;
1241
1242         if (adev->asic_type == CHIP_CARRIZO)
1243                 max_me = 5;
1244
1245         /* write the cp table buffer */
1246         dst_ptr = adev->gfx.rlc.cp_table_ptr;
1247         for (me = 0; me < max_me; me++) {
1248                 if (me == 0) {
1249                         const struct gfx_firmware_header_v1_0 *hdr =
1250                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1251                         fw_data = (const __le32 *)
1252                                 (adev->gfx.ce_fw->data +
1253                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1254                         table_offset = le32_to_cpu(hdr->jt_offset);
1255                         table_size = le32_to_cpu(hdr->jt_size);
1256                 } else if (me == 1) {
1257                         const struct gfx_firmware_header_v1_0 *hdr =
1258                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1259                         fw_data = (const __le32 *)
1260                                 (adev->gfx.pfp_fw->data +
1261                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1262                         table_offset = le32_to_cpu(hdr->jt_offset);
1263                         table_size = le32_to_cpu(hdr->jt_size);
1264                 } else if (me == 2) {
1265                         const struct gfx_firmware_header_v1_0 *hdr =
1266                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1267                         fw_data = (const __le32 *)
1268                                 (adev->gfx.me_fw->data +
1269                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1270                         table_offset = le32_to_cpu(hdr->jt_offset);
1271                         table_size = le32_to_cpu(hdr->jt_size);
1272                 } else if (me == 3) {
1273                         const struct gfx_firmware_header_v1_0 *hdr =
1274                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1275                         fw_data = (const __le32 *)
1276                                 (adev->gfx.mec_fw->data +
1277                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1278                         table_offset = le32_to_cpu(hdr->jt_offset);
1279                         table_size = le32_to_cpu(hdr->jt_size);
1280                 } else  if (me == 4) {
1281                         const struct gfx_firmware_header_v1_0 *hdr =
1282                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
1283                         fw_data = (const __le32 *)
1284                                 (adev->gfx.mec2_fw->data +
1285                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1286                         table_offset = le32_to_cpu(hdr->jt_offset);
1287                         table_size = le32_to_cpu(hdr->jt_size);
1288                 }
1289
1290                 for (i = 0; i < table_size; i ++) {
1291                         dst_ptr[bo_offset + i] =
1292                                 cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
1293                 }
1294
1295                 bo_offset += table_size;
1296         }
1297 }
1298
1299 static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev)
1300 {
1301         amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, NULL, NULL);
1302         amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, NULL, NULL);
1303 }
1304
1305 static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
1306 {
1307         volatile u32 *dst_ptr;
1308         u32 dws;
1309         const struct cs_section_def *cs_data;
1310         int r;
1311
1312         adev->gfx.rlc.cs_data = vi_cs_data;
1313
1314         cs_data = adev->gfx.rlc.cs_data;
1315
1316         if (cs_data) {
1317                 /* clear state block */
1318                 adev->gfx.rlc.clear_state_size = dws = gfx_v8_0_get_csb_size(adev);
1319
1320                 r = amdgpu_bo_create_reserved(adev, dws * 4, PAGE_SIZE,
1321                                               AMDGPU_GEM_DOMAIN_VRAM,
1322                                               &adev->gfx.rlc.clear_state_obj,
1323                                               &adev->gfx.rlc.clear_state_gpu_addr,
1324                                               (void **)&adev->gfx.rlc.cs_ptr);
1325                 if (r) {
1326                         dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r);
1327                         gfx_v8_0_rlc_fini(adev);
1328                         return r;
1329                 }
1330
1331                 /* set up the cs buffer */
1332                 dst_ptr = adev->gfx.rlc.cs_ptr;
1333                 gfx_v8_0_get_csb_buffer(adev, dst_ptr);
1334                 amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
1335                 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1336         }
1337
1338         if ((adev->asic_type == CHIP_CARRIZO) ||
1339             (adev->asic_type == CHIP_STONEY)) {
1340                 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1341                 r = amdgpu_bo_create_reserved(adev, adev->gfx.rlc.cp_table_size,
1342                                               PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
1343                                               &adev->gfx.rlc.cp_table_obj,
1344                                               &adev->gfx.rlc.cp_table_gpu_addr,
1345                                               (void **)&adev->gfx.rlc.cp_table_ptr);
1346                 if (r) {
1347                         dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r);
1348                         return r;
1349                 }
1350
1351                 cz_init_cp_jump_table(adev);
1352
1353                 amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj);
1354                 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1355         }
1356
1357         return 0;
1358 }
1359
1360 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
1361 {
1362         amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1363 }
1364
1365 static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
1366 {
1367         int r;
1368         u32 *hpd;
1369         size_t mec_hpd_size;
1370
1371         bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1372
1373         /* take ownership of the relevant compute queues */
1374         amdgpu_gfx_compute_queue_acquire(adev);
1375
1376         mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE;
1377
1378         r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1379                                       AMDGPU_GEM_DOMAIN_GTT,
1380                                       &adev->gfx.mec.hpd_eop_obj,
1381                                       &adev->gfx.mec.hpd_eop_gpu_addr,
1382                                       (void **)&hpd);
1383         if (r) {
1384                 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1385                 return r;
1386         }
1387
1388         memset(hpd, 0, mec_hpd_size);
1389
1390         amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1391         amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1392
1393         return 0;
1394 }
1395
1396 static const u32 vgpr_init_compute_shader[] =
1397 {
1398         0x7e000209, 0x7e020208,
1399         0x7e040207, 0x7e060206,
1400         0x7e080205, 0x7e0a0204,
1401         0x7e0c0203, 0x7e0e0202,
1402         0x7e100201, 0x7e120200,
1403         0x7e140209, 0x7e160208,
1404         0x7e180207, 0x7e1a0206,
1405         0x7e1c0205, 0x7e1e0204,
1406         0x7e200203, 0x7e220202,
1407         0x7e240201, 0x7e260200,
1408         0x7e280209, 0x7e2a0208,
1409         0x7e2c0207, 0x7e2e0206,
1410         0x7e300205, 0x7e320204,
1411         0x7e340203, 0x7e360202,
1412         0x7e380201, 0x7e3a0200,
1413         0x7e3c0209, 0x7e3e0208,
1414         0x7e400207, 0x7e420206,
1415         0x7e440205, 0x7e460204,
1416         0x7e480203, 0x7e4a0202,
1417         0x7e4c0201, 0x7e4e0200,
1418         0x7e500209, 0x7e520208,
1419         0x7e540207, 0x7e560206,
1420         0x7e580205, 0x7e5a0204,
1421         0x7e5c0203, 0x7e5e0202,
1422         0x7e600201, 0x7e620200,
1423         0x7e640209, 0x7e660208,
1424         0x7e680207, 0x7e6a0206,
1425         0x7e6c0205, 0x7e6e0204,
1426         0x7e700203, 0x7e720202,
1427         0x7e740201, 0x7e760200,
1428         0x7e780209, 0x7e7a0208,
1429         0x7e7c0207, 0x7e7e0206,
1430         0xbf8a0000, 0xbf810000,
1431 };
1432
1433 static const u32 sgpr_init_compute_shader[] =
1434 {
1435         0xbe8a0100, 0xbe8c0102,
1436         0xbe8e0104, 0xbe900106,
1437         0xbe920108, 0xbe940100,
1438         0xbe960102, 0xbe980104,
1439         0xbe9a0106, 0xbe9c0108,
1440         0xbe9e0100, 0xbea00102,
1441         0xbea20104, 0xbea40106,
1442         0xbea60108, 0xbea80100,
1443         0xbeaa0102, 0xbeac0104,
1444         0xbeae0106, 0xbeb00108,
1445         0xbeb20100, 0xbeb40102,
1446         0xbeb60104, 0xbeb80106,
1447         0xbeba0108, 0xbebc0100,
1448         0xbebe0102, 0xbec00104,
1449         0xbec20106, 0xbec40108,
1450         0xbec60100, 0xbec80102,
1451         0xbee60004, 0xbee70005,
1452         0xbeea0006, 0xbeeb0007,
1453         0xbee80008, 0xbee90009,
1454         0xbefc0000, 0xbf8a0000,
1455         0xbf810000, 0x00000000,
1456 };
1457
1458 static const u32 vgpr_init_regs[] =
1459 {
1460         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1461         mmCOMPUTE_RESOURCE_LIMITS, 0,
1462         mmCOMPUTE_NUM_THREAD_X, 256*4,
1463         mmCOMPUTE_NUM_THREAD_Y, 1,
1464         mmCOMPUTE_NUM_THREAD_Z, 1,
1465         mmCOMPUTE_PGM_RSRC2, 20,
1466         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1467         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1468         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1469         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1470         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1471         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1472         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1473         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1474         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1475         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1476 };
1477
1478 static const u32 sgpr1_init_regs[] =
1479 {
1480         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1481         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1482         mmCOMPUTE_NUM_THREAD_X, 256*5,
1483         mmCOMPUTE_NUM_THREAD_Y, 1,
1484         mmCOMPUTE_NUM_THREAD_Z, 1,
1485         mmCOMPUTE_PGM_RSRC2, 20,
1486         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1487         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1488         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1489         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1490         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1491         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1492         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1493         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1494         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1495         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1496 };
1497
1498 static const u32 sgpr2_init_regs[] =
1499 {
1500         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1501         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1502         mmCOMPUTE_NUM_THREAD_X, 256*5,
1503         mmCOMPUTE_NUM_THREAD_Y, 1,
1504         mmCOMPUTE_NUM_THREAD_Z, 1,
1505         mmCOMPUTE_PGM_RSRC2, 20,
1506         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1507         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1508         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1509         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1510         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1511         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1512         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1513         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1514         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1515         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1516 };
1517
1518 static const u32 sec_ded_counter_registers[] =
1519 {
1520         mmCPC_EDC_ATC_CNT,
1521         mmCPC_EDC_SCRATCH_CNT,
1522         mmCPC_EDC_UCODE_CNT,
1523         mmCPF_EDC_ATC_CNT,
1524         mmCPF_EDC_ROQ_CNT,
1525         mmCPF_EDC_TAG_CNT,
1526         mmCPG_EDC_ATC_CNT,
1527         mmCPG_EDC_DMA_CNT,
1528         mmCPG_EDC_TAG_CNT,
1529         mmDC_EDC_CSINVOC_CNT,
1530         mmDC_EDC_RESTORE_CNT,
1531         mmDC_EDC_STATE_CNT,
1532         mmGDS_EDC_CNT,
1533         mmGDS_EDC_GRBM_CNT,
1534         mmGDS_EDC_OA_DED,
1535         mmSPI_EDC_CNT,
1536         mmSQC_ATC_EDC_GATCL1_CNT,
1537         mmSQC_EDC_CNT,
1538         mmSQ_EDC_DED_CNT,
1539         mmSQ_EDC_INFO,
1540         mmSQ_EDC_SEC_CNT,
1541         mmTCC_EDC_CNT,
1542         mmTCP_ATC_EDC_GATCL1_CNT,
1543         mmTCP_EDC_CNT,
1544         mmTD_EDC_CNT
1545 };
1546
1547 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1548 {
1549         struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1550         struct amdgpu_ib ib;
1551         struct dma_fence *f = NULL;
1552         int r, i;
1553         u32 tmp;
1554         unsigned total_size, vgpr_offset, sgpr_offset;
1555         u64 gpu_addr;
1556
1557         /* only supported on CZ */
1558         if (adev->asic_type != CHIP_CARRIZO)
1559                 return 0;
1560
1561         /* bail if the compute ring is not ready */
1562         if (!ring->ready)
1563                 return 0;
1564
1565         tmp = RREG32(mmGB_EDC_MODE);
1566         WREG32(mmGB_EDC_MODE, 0);
1567
1568         total_size =
1569                 (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1570         total_size +=
1571                 (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1572         total_size +=
1573                 (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1574         total_size = ALIGN(total_size, 256);
1575         vgpr_offset = total_size;
1576         total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1577         sgpr_offset = total_size;
1578         total_size += sizeof(sgpr_init_compute_shader);
1579
1580         /* allocate an indirect buffer to put the commands in */
1581         memset(&ib, 0, sizeof(ib));
1582         r = amdgpu_ib_get(adev, NULL, total_size, &ib);
1583         if (r) {
1584                 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1585                 return r;
1586         }
1587
1588         /* load the compute shaders */
1589         for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1590                 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1591
1592         for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1593                 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1594
1595         /* init the ib length to 0 */
1596         ib.length_dw = 0;
1597
1598         /* VGPR */
1599         /* write the register state for the compute dispatch */
1600         for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1601                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1602                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1603                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1604         }
1605         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1606         gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1607         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1608         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1609         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1610         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1611
1612         /* write dispatch packet */
1613         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1614         ib.ptr[ib.length_dw++] = 8; /* x */
1615         ib.ptr[ib.length_dw++] = 1; /* y */
1616         ib.ptr[ib.length_dw++] = 1; /* z */
1617         ib.ptr[ib.length_dw++] =
1618                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1619
1620         /* write CS partial flush packet */
1621         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1622         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1623
1624         /* SGPR1 */
1625         /* write the register state for the compute dispatch */
1626         for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1627                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1628                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1629                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1630         }
1631         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1632         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1633         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1634         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1635         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1636         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1637
1638         /* write dispatch packet */
1639         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1640         ib.ptr[ib.length_dw++] = 8; /* x */
1641         ib.ptr[ib.length_dw++] = 1; /* y */
1642         ib.ptr[ib.length_dw++] = 1; /* z */
1643         ib.ptr[ib.length_dw++] =
1644                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1645
1646         /* write CS partial flush packet */
1647         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1648         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1649
1650         /* SGPR2 */
1651         /* write the register state for the compute dispatch */
1652         for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1653                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1654                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1655                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1656         }
1657         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1658         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1659         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1660         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1661         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1662         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1663
1664         /* write dispatch packet */
1665         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1666         ib.ptr[ib.length_dw++] = 8; /* x */
1667         ib.ptr[ib.length_dw++] = 1; /* y */
1668         ib.ptr[ib.length_dw++] = 1; /* z */
1669         ib.ptr[ib.length_dw++] =
1670                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1671
1672         /* write CS partial flush packet */
1673         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1674         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1675
1676         /* shedule the ib on the ring */
1677         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1678         if (r) {
1679                 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1680                 goto fail;
1681         }
1682
1683         /* wait for the GPU to finish processing the IB */
1684         r = dma_fence_wait(f, false);
1685         if (r) {
1686                 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1687                 goto fail;
1688         }
1689
1690         tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1691         tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1692         WREG32(mmGB_EDC_MODE, tmp);
1693
1694         tmp = RREG32(mmCC_GC_EDC_CONFIG);
1695         tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1696         WREG32(mmCC_GC_EDC_CONFIG, tmp);
1697
1698
1699         /* read back registers to clear the counters */
1700         for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1701                 RREG32(sec_ded_counter_registers[i]);
1702
1703 fail:
1704         amdgpu_ib_free(adev, &ib, NULL);
1705         dma_fence_put(f);
1706
1707         return r;
1708 }
1709
1710 static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1711 {
1712         u32 gb_addr_config;
1713         u32 mc_shared_chmap, mc_arb_ramcfg;
1714         u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1715         u32 tmp;
1716         int ret;
1717
1718         switch (adev->asic_type) {
1719         case CHIP_TOPAZ:
1720                 adev->gfx.config.max_shader_engines = 1;
1721                 adev->gfx.config.max_tile_pipes = 2;
1722                 adev->gfx.config.max_cu_per_sh = 6;
1723                 adev->gfx.config.max_sh_per_se = 1;
1724                 adev->gfx.config.max_backends_per_se = 2;
1725                 adev->gfx.config.max_texture_channel_caches = 2;
1726                 adev->gfx.config.max_gprs = 256;
1727                 adev->gfx.config.max_gs_threads = 32;
1728                 adev->gfx.config.max_hw_contexts = 8;
1729
1730                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1731                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1732                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1733                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1734                 gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1735                 break;
1736         case CHIP_FIJI:
1737                 adev->gfx.config.max_shader_engines = 4;
1738                 adev->gfx.config.max_tile_pipes = 16;
1739                 adev->gfx.config.max_cu_per_sh = 16;
1740                 adev->gfx.config.max_sh_per_se = 1;
1741                 adev->gfx.config.max_backends_per_se = 4;
1742                 adev->gfx.config.max_texture_channel_caches = 16;
1743                 adev->gfx.config.max_gprs = 256;
1744                 adev->gfx.config.max_gs_threads = 32;
1745                 adev->gfx.config.max_hw_contexts = 8;
1746
1747                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1748                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1749                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1750                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1751                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1752                 break;
1753         case CHIP_POLARIS11:
1754         case CHIP_POLARIS12:
1755                 ret = amdgpu_atombios_get_gfx_info(adev);
1756                 if (ret)
1757                         return ret;
1758                 adev->gfx.config.max_gprs = 256;
1759                 adev->gfx.config.max_gs_threads = 32;
1760                 adev->gfx.config.max_hw_contexts = 8;
1761
1762                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1763                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1764                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1765                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1766                 gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
1767                 break;
1768         case CHIP_POLARIS10:
1769                 ret = amdgpu_atombios_get_gfx_info(adev);
1770                 if (ret)
1771                         return ret;
1772                 adev->gfx.config.max_gprs = 256;
1773                 adev->gfx.config.max_gs_threads = 32;
1774                 adev->gfx.config.max_hw_contexts = 8;
1775
1776                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1777                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1778                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1779                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1780                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1781                 break;
1782         case CHIP_TONGA:
1783                 adev->gfx.config.max_shader_engines = 4;
1784                 adev->gfx.config.max_tile_pipes = 8;
1785                 adev->gfx.config.max_cu_per_sh = 8;
1786                 adev->gfx.config.max_sh_per_se = 1;
1787                 adev->gfx.config.max_backends_per_se = 2;
1788                 adev->gfx.config.max_texture_channel_caches = 8;
1789                 adev->gfx.config.max_gprs = 256;
1790                 adev->gfx.config.max_gs_threads = 32;
1791                 adev->gfx.config.max_hw_contexts = 8;
1792
1793                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1794                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1795                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1796                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1797                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1798                 break;
1799         case CHIP_CARRIZO:
1800                 adev->gfx.config.max_shader_engines = 1;
1801                 adev->gfx.config.max_tile_pipes = 2;
1802                 adev->gfx.config.max_sh_per_se = 1;
1803                 adev->gfx.config.max_backends_per_se = 2;
1804                 adev->gfx.config.max_cu_per_sh = 8;
1805                 adev->gfx.config.max_texture_channel_caches = 2;
1806                 adev->gfx.config.max_gprs = 256;
1807                 adev->gfx.config.max_gs_threads = 32;
1808                 adev->gfx.config.max_hw_contexts = 8;
1809
1810                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1811                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1812                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1813                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1814                 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1815                 break;
1816         case CHIP_STONEY:
1817                 adev->gfx.config.max_shader_engines = 1;
1818                 adev->gfx.config.max_tile_pipes = 2;
1819                 adev->gfx.config.max_sh_per_se = 1;
1820                 adev->gfx.config.max_backends_per_se = 1;
1821                 adev->gfx.config.max_cu_per_sh = 3;
1822                 adev->gfx.config.max_texture_channel_caches = 2;
1823                 adev->gfx.config.max_gprs = 256;
1824                 adev->gfx.config.max_gs_threads = 16;
1825                 adev->gfx.config.max_hw_contexts = 8;
1826
1827                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1828                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1829                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1830                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1831                 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1832                 break;
1833         default:
1834                 adev->gfx.config.max_shader_engines = 2;
1835                 adev->gfx.config.max_tile_pipes = 4;
1836                 adev->gfx.config.max_cu_per_sh = 2;
1837                 adev->gfx.config.max_sh_per_se = 1;
1838                 adev->gfx.config.max_backends_per_se = 2;
1839                 adev->gfx.config.max_texture_channel_caches = 4;
1840                 adev->gfx.config.max_gprs = 256;
1841                 adev->gfx.config.max_gs_threads = 32;
1842                 adev->gfx.config.max_hw_contexts = 8;
1843
1844                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1845                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1846                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1847                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1848                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1849                 break;
1850         }
1851
1852         mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
1853         adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1854         mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1855
1856         adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1857         adev->gfx.config.mem_max_burst_length_bytes = 256;
1858         if (adev->flags & AMD_IS_APU) {
1859                 /* Get memory bank mapping mode. */
1860                 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
1861                 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1862                 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1863
1864                 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
1865                 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1866                 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1867
1868                 /* Validate settings in case only one DIMM installed. */
1869                 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
1870                         dimm00_addr_map = 0;
1871                 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
1872                         dimm01_addr_map = 0;
1873                 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
1874                         dimm10_addr_map = 0;
1875                 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
1876                         dimm11_addr_map = 0;
1877
1878                 /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1879                 /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1880                 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
1881                         adev->gfx.config.mem_row_size_in_kb = 2;
1882                 else
1883                         adev->gfx.config.mem_row_size_in_kb = 1;
1884         } else {
1885                 tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
1886                 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1887                 if (adev->gfx.config.mem_row_size_in_kb > 4)
1888                         adev->gfx.config.mem_row_size_in_kb = 4;
1889         }
1890
1891         adev->gfx.config.shader_engine_tile_size = 32;
1892         adev->gfx.config.num_gpus = 1;
1893         adev->gfx.config.multi_gpu_tile_size = 64;
1894
1895         /* fix up row size */
1896         switch (adev->gfx.config.mem_row_size_in_kb) {
1897         case 1:
1898         default:
1899                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
1900                 break;
1901         case 2:
1902                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
1903                 break;
1904         case 4:
1905                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
1906                 break;
1907         }
1908         adev->gfx.config.gb_addr_config = gb_addr_config;
1909
1910         return 0;
1911 }
1912
1913 static int gfx_v8_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
1914                                         int mec, int pipe, int queue)
1915 {
1916         int r;
1917         unsigned irq_type;
1918         struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
1919
1920         ring = &adev->gfx.compute_ring[ring_id];
1921
1922         /* mec0 is me1 */
1923         ring->me = mec + 1;
1924         ring->pipe = pipe;
1925         ring->queue = queue;
1926
1927         ring->ring_obj = NULL;
1928         ring->use_doorbell = true;
1929         ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + ring_id;
1930         ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
1931                                 + (ring_id * GFX8_MEC_HPD_SIZE);
1932         sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
1933
1934         irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
1935                 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
1936                 + ring->pipe;
1937
1938         /* type-2 packets are deprecated on MEC, use type-3 instead */
1939         r = amdgpu_ring_init(adev, ring, 1024,
1940                         &adev->gfx.eop_irq, irq_type);
1941         if (r)
1942                 return r;
1943
1944
1945         return 0;
1946 }
1947
1948 static int gfx_v8_0_sw_init(void *handle)
1949 {
1950         int i, j, k, r, ring_id;
1951         struct amdgpu_ring *ring;
1952         struct amdgpu_kiq *kiq;
1953         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1954
1955         switch (adev->asic_type) {
1956         case CHIP_FIJI:
1957         case CHIP_TONGA:
1958         case CHIP_POLARIS11:
1959         case CHIP_POLARIS12:
1960         case CHIP_POLARIS10:
1961         case CHIP_CARRIZO:
1962                 adev->gfx.mec.num_mec = 2;
1963                 break;
1964         case CHIP_TOPAZ:
1965         case CHIP_STONEY:
1966         default:
1967                 adev->gfx.mec.num_mec = 1;
1968                 break;
1969         }
1970
1971         adev->gfx.mec.num_pipe_per_mec = 4;
1972         adev->gfx.mec.num_queue_per_pipe = 8;
1973
1974         /* KIQ event */
1975         r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 178, &adev->gfx.kiq.irq);
1976         if (r)
1977                 return r;
1978
1979         /* EOP Event */
1980         r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 181, &adev->gfx.eop_irq);
1981         if (r)
1982                 return r;
1983
1984         /* Privileged reg */
1985         r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 184,
1986                               &adev->gfx.priv_reg_irq);
1987         if (r)
1988                 return r;
1989
1990         /* Privileged inst */
1991         r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 185,
1992                               &adev->gfx.priv_inst_irq);
1993         if (r)
1994                 return r;
1995
1996         adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
1997
1998         gfx_v8_0_scratch_init(adev);
1999
2000         r = gfx_v8_0_init_microcode(adev);
2001         if (r) {
2002                 DRM_ERROR("Failed to load gfx firmware!\n");
2003                 return r;
2004         }
2005
2006         r = gfx_v8_0_rlc_init(adev);
2007         if (r) {
2008                 DRM_ERROR("Failed to init rlc BOs!\n");
2009                 return r;
2010         }
2011
2012         r = gfx_v8_0_mec_init(adev);
2013         if (r) {
2014                 DRM_ERROR("Failed to init MEC BOs!\n");
2015                 return r;
2016         }
2017
2018         /* set up the gfx ring */
2019         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2020                 ring = &adev->gfx.gfx_ring[i];
2021                 ring->ring_obj = NULL;
2022                 sprintf(ring->name, "gfx");
2023                 /* no gfx doorbells on iceland */
2024                 if (adev->asic_type != CHIP_TOPAZ) {
2025                         ring->use_doorbell = true;
2026                         ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0;
2027                 }
2028
2029                 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2030                                      AMDGPU_CP_IRQ_GFX_EOP);
2031                 if (r)
2032                         return r;
2033         }
2034
2035
2036         /* set up the compute queues - allocate horizontally across pipes */
2037         ring_id = 0;
2038         for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
2039                 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
2040                         for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
2041                                 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
2042                                         continue;
2043
2044                                 r = gfx_v8_0_compute_ring_init(adev,
2045                                                                 ring_id,
2046                                                                 i, k, j);
2047                                 if (r)
2048                                         return r;
2049
2050                                 ring_id++;
2051                         }
2052                 }
2053         }
2054
2055         r = amdgpu_gfx_kiq_init(adev, GFX8_MEC_HPD_SIZE);
2056         if (r) {
2057                 DRM_ERROR("Failed to init KIQ BOs!\n");
2058                 return r;
2059         }
2060
2061         kiq = &adev->gfx.kiq;
2062         r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
2063         if (r)
2064                 return r;
2065
2066         /* create MQD for all compute queues as well as KIQ for SRIOV case */
2067         r = amdgpu_gfx_compute_mqd_sw_init(adev, sizeof(struct vi_mqd_allocation));
2068         if (r)
2069                 return r;
2070
2071         /* reserve GDS, GWS and OA resource for gfx */
2072         r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size,
2073                                     PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS,
2074                                     &adev->gds.gds_gfx_bo, NULL, NULL);
2075         if (r)
2076                 return r;
2077
2078         r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size,
2079                                     PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS,
2080                                     &adev->gds.gws_gfx_bo, NULL, NULL);
2081         if (r)
2082                 return r;
2083
2084         r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size,
2085                                     PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA,
2086                                     &adev->gds.oa_gfx_bo, NULL, NULL);
2087         if (r)
2088                 return r;
2089
2090         adev->gfx.ce_ram_size = 0x8000;
2091
2092         r = gfx_v8_0_gpu_early_init(adev);
2093         if (r)
2094                 return r;
2095
2096         return 0;
2097 }
2098
2099 static int gfx_v8_0_sw_fini(void *handle)
2100 {
2101         int i;
2102         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2103
2104         amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
2105         amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL);
2106         amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
2107
2108         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2109                 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2110         for (i = 0; i < adev->gfx.num_compute_rings; i++)
2111                 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2112
2113         amdgpu_gfx_compute_mqd_sw_fini(adev);
2114         amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
2115         amdgpu_gfx_kiq_fini(adev);
2116         amdgpu_bo_free_kernel(&adev->virt.csa_obj, &adev->virt.csa_vmid0_addr, NULL);
2117
2118         gfx_v8_0_mec_fini(adev);
2119         gfx_v8_0_rlc_fini(adev);
2120         gfx_v8_0_free_microcode(adev);
2121
2122         return 0;
2123 }
2124
2125 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
2126 {
2127         uint32_t *modearray, *mod2array;
2128         const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
2129         const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
2130         u32 reg_offset;
2131
2132         modearray = adev->gfx.config.tile_mode_array;
2133         mod2array = adev->gfx.config.macrotile_mode_array;
2134
2135         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2136                 modearray[reg_offset] = 0;
2137
2138         for (reg_offset = 0; reg_offset <  num_secondary_tile_mode_states; reg_offset++)
2139                 mod2array[reg_offset] = 0;
2140
2141         switch (adev->asic_type) {
2142         case CHIP_TOPAZ:
2143                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2144                                 PIPE_CONFIG(ADDR_SURF_P2) |
2145                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2146                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2147                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2148                                 PIPE_CONFIG(ADDR_SURF_P2) |
2149                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2150                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2151                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2152                                 PIPE_CONFIG(ADDR_SURF_P2) |
2153                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2154                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2155                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2156                                 PIPE_CONFIG(ADDR_SURF_P2) |
2157                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2158                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2159                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2160                                 PIPE_CONFIG(ADDR_SURF_P2) |
2161                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2162                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2163                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2164                                 PIPE_CONFIG(ADDR_SURF_P2) |
2165                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2166                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2167                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2168                                 PIPE_CONFIG(ADDR_SURF_P2) |
2169                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2170                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2171                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2172                                 PIPE_CONFIG(ADDR_SURF_P2));
2173                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2174                                 PIPE_CONFIG(ADDR_SURF_P2) |
2175                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2176                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2177                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2178                                  PIPE_CONFIG(ADDR_SURF_P2) |
2179                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2180                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2181                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2182                                  PIPE_CONFIG(ADDR_SURF_P2) |
2183                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2184                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2185                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2186                                  PIPE_CONFIG(ADDR_SURF_P2) |
2187                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2188                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2189                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2190                                  PIPE_CONFIG(ADDR_SURF_P2) |
2191                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2192                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2193                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2194                                  PIPE_CONFIG(ADDR_SURF_P2) |
2195                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2196                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2197                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2198                                  PIPE_CONFIG(ADDR_SURF_P2) |
2199                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2200                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2201                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2202                                  PIPE_CONFIG(ADDR_SURF_P2) |
2203                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2204                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2205                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2206                                  PIPE_CONFIG(ADDR_SURF_P2) |
2207                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2208                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2209                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2210                                  PIPE_CONFIG(ADDR_SURF_P2) |
2211                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2212                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2213                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2214                                  PIPE_CONFIG(ADDR_SURF_P2) |
2215                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2216                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2217                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2218                                  PIPE_CONFIG(ADDR_SURF_P2) |
2219                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2220                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2221                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2222                                  PIPE_CONFIG(ADDR_SURF_P2) |
2223                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2224                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2225                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2226                                  PIPE_CONFIG(ADDR_SURF_P2) |
2227                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2228                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2229                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2230                                  PIPE_CONFIG(ADDR_SURF_P2) |
2231                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2232                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2233                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2234                                  PIPE_CONFIG(ADDR_SURF_P2) |
2235                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2236                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2237                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2238                                  PIPE_CONFIG(ADDR_SURF_P2) |
2239                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2240                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2241                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2242                                  PIPE_CONFIG(ADDR_SURF_P2) |
2243                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2244                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2245
2246                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2247                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2248                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2249                                 NUM_BANKS(ADDR_SURF_8_BANK));
2250                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2251                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2252                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2253                                 NUM_BANKS(ADDR_SURF_8_BANK));
2254                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2255                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2256                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2257                                 NUM_BANKS(ADDR_SURF_8_BANK));
2258                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2259                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2260                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2261                                 NUM_BANKS(ADDR_SURF_8_BANK));
2262                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2263                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2264                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2265                                 NUM_BANKS(ADDR_SURF_8_BANK));
2266                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2267                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2268                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2269                                 NUM_BANKS(ADDR_SURF_8_BANK));
2270                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2271                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2272                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2273                                 NUM_BANKS(ADDR_SURF_8_BANK));
2274                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2275                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2276                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2277                                 NUM_BANKS(ADDR_SURF_16_BANK));
2278                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2279                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2280                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2281                                 NUM_BANKS(ADDR_SURF_16_BANK));
2282                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2283                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2284                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2285                                  NUM_BANKS(ADDR_SURF_16_BANK));
2286                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2287                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2288                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2289                                  NUM_BANKS(ADDR_SURF_16_BANK));
2290                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2291                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2292                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2293                                  NUM_BANKS(ADDR_SURF_16_BANK));
2294                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2295                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2296                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2297                                  NUM_BANKS(ADDR_SURF_16_BANK));
2298                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2299                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2300                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2301                                  NUM_BANKS(ADDR_SURF_8_BANK));
2302
2303                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2304                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2305                             reg_offset != 23)
2306                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2307
2308                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2309                         if (reg_offset != 7)
2310                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2311
2312                 break;
2313         case CHIP_FIJI:
2314                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2315                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2316                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2317                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2318                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2319                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2320                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2321                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2322                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2323                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2324                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2325                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2326                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2327                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2328                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2329                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2330                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2331                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2332                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2333                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2334                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2335                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2336                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2337                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2338                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2339                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2340                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2341                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2342                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2343                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2344                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2345                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2346                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2347                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2348                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2349                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2350                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2351                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2352                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2353                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2354                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2355                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2356                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2357                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2358                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2359                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2360                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2361                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2362                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2363                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2364                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2365                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2366                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2367                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2368                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2369                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2370                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2371                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2372                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2373                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2374                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2375                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2376                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2377                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2378                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2379                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2380                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2381                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2382                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2383                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2384                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2385                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2386                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2387                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2388                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2389                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2390                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2391                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2392                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2393                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2394                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2395                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2396                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2397                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2398                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2399                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2400                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2401                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2402                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2403                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2404                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2405                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2406                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2407                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2408                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2409                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2410                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2411                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2412                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2413                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2414                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2415                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2416                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2417                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2418                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2419                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2420                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2421                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2422                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2423                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2424                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2425                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2426                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2427                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2428                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2429                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2430                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2431                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2432                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2433                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2434                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2435                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2436
2437                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2438                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2439                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2440                                 NUM_BANKS(ADDR_SURF_8_BANK));
2441                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2442                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2443                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2444                                 NUM_BANKS(ADDR_SURF_8_BANK));
2445                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2446                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2447                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2448                                 NUM_BANKS(ADDR_SURF_8_BANK));
2449                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2450                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2451                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2452                                 NUM_BANKS(ADDR_SURF_8_BANK));
2453                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2454                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2455                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2456                                 NUM_BANKS(ADDR_SURF_8_BANK));
2457                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2458                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2459                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2460                                 NUM_BANKS(ADDR_SURF_8_BANK));
2461                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2462                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2463                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2464                                 NUM_BANKS(ADDR_SURF_8_BANK));
2465                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2466                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2467                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2468                                 NUM_BANKS(ADDR_SURF_8_BANK));
2469                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2470                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2471                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2472                                 NUM_BANKS(ADDR_SURF_8_BANK));
2473                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2474                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2475                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2476                                  NUM_BANKS(ADDR_SURF_8_BANK));
2477                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2478                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2479                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2480                                  NUM_BANKS(ADDR_SURF_8_BANK));
2481                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2482                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2483                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2484                                  NUM_BANKS(ADDR_SURF_8_BANK));
2485                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2486                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2487                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2488                                  NUM_BANKS(ADDR_SURF_8_BANK));
2489                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2490                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2491                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2492                                  NUM_BANKS(ADDR_SURF_4_BANK));
2493
2494                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2495                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2496
2497                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2498                         if (reg_offset != 7)
2499                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2500
2501                 break;
2502         case CHIP_TONGA:
2503                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2504                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2505                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2506                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2507                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2508                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2509                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2510                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2511                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2512                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2513                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2514                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2515                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2516                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2517                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2518                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2519                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2520                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2521                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2522                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2523                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2524                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2525                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2526                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2527                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2528                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2529                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2530                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2531                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2532                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2533                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2534                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2535                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2536                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2537                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2538                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2539                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2540                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2541                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2542                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2543                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2544                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2545                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2546                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2547                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2548                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2549                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2550                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2551                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2552                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2553                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2554                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2555                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2556                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2557                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2558                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2559                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2560                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2561                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2562                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2563                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2564                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2565                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2566                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2567                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2568                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2569                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2570                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2571                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2572                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2573                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2574                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2575                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2576                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2577                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2578                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2579                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2580                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2581                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2582                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2583                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2584                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2585                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2586                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2587                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2588                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2589                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2590                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2591                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2592                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2593                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2594                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2595                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2596                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2597                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2598                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2599                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2600                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2601                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2602                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2603                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2604                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2605                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2606                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2607                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2608                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2609                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2610                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2611                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2612                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2613                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2614                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2615                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2616                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2617                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2618                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2619                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2620                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2621                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2622                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2623                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2624                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2625
2626                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2627                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2628                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2629                                 NUM_BANKS(ADDR_SURF_16_BANK));
2630                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2631                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2632                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2633                                 NUM_BANKS(ADDR_SURF_16_BANK));
2634                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2635                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2636                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2637                                 NUM_BANKS(ADDR_SURF_16_BANK));
2638                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2639                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2640                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2641                                 NUM_BANKS(ADDR_SURF_16_BANK));
2642                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2643                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2644                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2645                                 NUM_BANKS(ADDR_SURF_16_BANK));
2646                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2647                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2648                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2649                                 NUM_BANKS(ADDR_SURF_16_BANK));
2650                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2651                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2652                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2653                                 NUM_BANKS(ADDR_SURF_16_BANK));
2654                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2655                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2656                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2657                                 NUM_BANKS(ADDR_SURF_16_BANK));
2658                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2659                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2660                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2661                                 NUM_BANKS(ADDR_SURF_16_BANK));
2662                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2663                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2664                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2665                                  NUM_BANKS(ADDR_SURF_16_BANK));
2666                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2667                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2668                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2669                                  NUM_BANKS(ADDR_SURF_16_BANK));
2670                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2671                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2672                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2673                                  NUM_BANKS(ADDR_SURF_8_BANK));
2674                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2675                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2676                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2677                                  NUM_BANKS(ADDR_SURF_4_BANK));
2678                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2679                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2680                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2681                                  NUM_BANKS(ADDR_SURF_4_BANK));
2682
2683                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2684                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2685
2686                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2687                         if (reg_offset != 7)
2688                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2689
2690                 break;
2691         case CHIP_POLARIS11:
2692         case CHIP_POLARIS12:
2693                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2694                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2695                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2696                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2697                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2698                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2699                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2700                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2701                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2702                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2703                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2704                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2705                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2706                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2707                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2708                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2709                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2710                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2711                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2712                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2713                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2714                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2715                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2716                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2717                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2718                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2719                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2720                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2721                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2722                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2723                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2724                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2725                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2726                                 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2727                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2728                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2729                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2730                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2731                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2732                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2733                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2734                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2735                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2736                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2737                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2738                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2739                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2740                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2741                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2742                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2743                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2744                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2745                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2746                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2747                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2748                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2749                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2750                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2751                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2752                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2753                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2754                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2755                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2756                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2757                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2758                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2759                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2760                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2761                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2762                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2763                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2764                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2765                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2766                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2767                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2768                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2769                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2770                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2771                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2772                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2773                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2774                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2775                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2776                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2777                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2778                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2779                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2780                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2781                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2782                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2783                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2784                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2785                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2786                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2787                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2788                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2789                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2790                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2791                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2792                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2793                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2794                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2795                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2796                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2797                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2798                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2799                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2800                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2801                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2802                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2803                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2804                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2805                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2806                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2807                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2808                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2809                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2810                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2811                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2812                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2813                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2814                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2815
2816                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2817                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2818                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2819                                 NUM_BANKS(ADDR_SURF_16_BANK));
2820
2821                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2822                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2823                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2824                                 NUM_BANKS(ADDR_SURF_16_BANK));
2825
2826                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2827                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2828                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2829                                 NUM_BANKS(ADDR_SURF_16_BANK));
2830
2831                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2832                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2833                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2834                                 NUM_BANKS(ADDR_SURF_16_BANK));
2835
2836                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2837                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2838                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2839                                 NUM_BANKS(ADDR_SURF_16_BANK));
2840
2841                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2842                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2843                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2844                                 NUM_BANKS(ADDR_SURF_16_BANK));
2845
2846                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2847                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2848                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2849                                 NUM_BANKS(ADDR_SURF_16_BANK));
2850
2851                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2852                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2853                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2854                                 NUM_BANKS(ADDR_SURF_16_BANK));
2855
2856                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2857                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2858                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2859                                 NUM_BANKS(ADDR_SURF_16_BANK));
2860
2861                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2862                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2863                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2864                                 NUM_BANKS(ADDR_SURF_16_BANK));
2865
2866                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2867                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2868                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2869                                 NUM_BANKS(ADDR_SURF_16_BANK));
2870
2871                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2872                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2873                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2874                                 NUM_BANKS(ADDR_SURF_16_BANK));
2875
2876                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2877                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2878                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2879                                 NUM_BANKS(ADDR_SURF_8_BANK));
2880
2881                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2882                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2883                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2884                                 NUM_BANKS(ADDR_SURF_4_BANK));
2885
2886                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2887                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2888
2889                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2890                         if (reg_offset != 7)
2891                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2892
2893                 break;
2894         case CHIP_POLARIS10:
2895                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2896                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2897                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2898                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2899                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2900                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2901                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2902                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2903                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2904                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2905                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2906                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2907                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2908                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2909                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2910                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2911                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2912                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2913                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2914                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2915                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2916                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2917                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2918                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2919                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2920                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2921                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2922                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2923                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2924                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2925                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2926                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2927                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2928                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2929                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2930                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2931                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2932                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2933                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2934                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2935                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2936                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2937                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2938                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2939                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2940                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2941                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2942                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2943                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2944                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2945                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2946                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2947                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2948                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2949                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2950                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2951                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2952                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2953                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2954                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2955                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2956                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2957                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2958                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2959                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2960                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2961                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2962                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2963                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2964                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2965                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2966                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2967                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2968                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2969                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2970                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2971                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2972                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2973                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2974                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2975                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2976                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2977                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2978                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2979                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2980                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2981                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2982                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2983                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2984                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2985                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2986                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2987                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2988                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2989                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2990                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2991                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2992                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2993                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2994                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2995                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2996                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2997                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2998                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2999                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3000                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3001                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3002                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3003                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3004                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3005                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3006                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3007                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3008                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3009                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3010                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3011                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3012                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3013                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3014                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3015                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3016                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3017
3018                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3019                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3020                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3021                                 NUM_BANKS(ADDR_SURF_16_BANK));
3022
3023                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3024                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3025                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3026                                 NUM_BANKS(ADDR_SURF_16_BANK));
3027
3028                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3029                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3030                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3031                                 NUM_BANKS(ADDR_SURF_16_BANK));
3032
3033                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3034                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3035                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3036                                 NUM_BANKS(ADDR_SURF_16_BANK));
3037
3038                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3039                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3040                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3041                                 NUM_BANKS(ADDR_SURF_16_BANK));
3042
3043                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3044                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3045                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3046                                 NUM_BANKS(ADDR_SURF_16_BANK));
3047
3048                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3049                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3050                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3051                                 NUM_BANKS(ADDR_SURF_16_BANK));
3052
3053                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3054                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3055                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3056                                 NUM_BANKS(ADDR_SURF_16_BANK));
3057
3058                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3059                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3060                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3061                                 NUM_BANKS(ADDR_SURF_16_BANK));
3062
3063                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3064                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3065                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3066                                 NUM_BANKS(ADDR_SURF_16_BANK));
3067
3068                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3069                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3070                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3071                                 NUM_BANKS(ADDR_SURF_16_BANK));
3072
3073                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3074                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3075                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3076                                 NUM_BANKS(ADDR_SURF_8_BANK));
3077
3078                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3079                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3080                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3081                                 NUM_BANKS(ADDR_SURF_4_BANK));
3082
3083                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3084                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3085                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3086                                 NUM_BANKS(ADDR_SURF_4_BANK));
3087
3088                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3089                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3090
3091                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3092                         if (reg_offset != 7)
3093                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3094
3095                 break;
3096         case CHIP_STONEY:
3097                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3098                                 PIPE_CONFIG(ADDR_SURF_P2) |
3099                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3100                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3101                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3102                                 PIPE_CONFIG(ADDR_SURF_P2) |
3103                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3104                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3105                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3106                                 PIPE_CONFIG(ADDR_SURF_P2) |
3107                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3108                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3109                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3110                                 PIPE_CONFIG(ADDR_SURF_P2) |
3111                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3112                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3113                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3114                                 PIPE_CONFIG(ADDR_SURF_P2) |
3115                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3116                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3117                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3118                                 PIPE_CONFIG(ADDR_SURF_P2) |
3119                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3120                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3121                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3122                                 PIPE_CONFIG(ADDR_SURF_P2) |
3123                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3124                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3125                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3126                                 PIPE_CONFIG(ADDR_SURF_P2));
3127                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3128                                 PIPE_CONFIG(ADDR_SURF_P2) |
3129                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3130                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3131                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3132                                  PIPE_CONFIG(ADDR_SURF_P2) |
3133                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3134                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3135                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3136                                  PIPE_CONFIG(ADDR_SURF_P2) |
3137                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3138                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3139                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3140                                  PIPE_CONFIG(ADDR_SURF_P2) |
3141                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3142                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3143                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3144                                  PIPE_CONFIG(ADDR_SURF_P2) |
3145                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3146                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3147                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3148                                  PIPE_CONFIG(ADDR_SURF_P2) |
3149                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3150                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3151                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3152                                  PIPE_CONFIG(ADDR_SURF_P2) |
3153                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3154                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3155                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3156                                  PIPE_CONFIG(ADDR_SURF_P2) |
3157                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3158                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3159                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3160                                  PIPE_CONFIG(ADDR_SURF_P2) |
3161                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3162                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3163                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3164                                  PIPE_CONFIG(ADDR_SURF_P2) |
3165                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3166                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3167                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3168                                  PIPE_CONFIG(ADDR_SURF_P2) |
3169                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3170                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3171                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3172                                  PIPE_CONFIG(ADDR_SURF_P2) |
3173                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3174                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3175                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3176                                  PIPE_CONFIG(ADDR_SURF_P2) |
3177                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3178                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3179                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3180                                  PIPE_CONFIG(ADDR_SURF_P2) |
3181                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3182                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3183                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3184                                  PIPE_CONFIG(ADDR_SURF_P2) |
3185                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3186                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3187                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3188                                  PIPE_CONFIG(ADDR_SURF_P2) |
3189                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3190                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3191                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3192                                  PIPE_CONFIG(ADDR_SURF_P2) |
3193                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3194                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3195                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3196                                  PIPE_CONFIG(ADDR_SURF_P2) |
3197                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3198                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3199
3200                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3201                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3202                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3203                                 NUM_BANKS(ADDR_SURF_8_BANK));
3204                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3205                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3206                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3207                                 NUM_BANKS(ADDR_SURF_8_BANK));
3208                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3209                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3210                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3211                                 NUM_BANKS(ADDR_SURF_8_BANK));
3212                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3213                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3214                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3215                                 NUM_BANKS(ADDR_SURF_8_BANK));
3216                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3217                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3218                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3219                                 NUM_BANKS(ADDR_SURF_8_BANK));
3220                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3221                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3222                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3223                                 NUM_BANKS(ADDR_SURF_8_BANK));
3224                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3225                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3226                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3227                                 NUM_BANKS(ADDR_SURF_8_BANK));
3228                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3229                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3230                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3231                                 NUM_BANKS(ADDR_SURF_16_BANK));
3232                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3233                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3234                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3235                                 NUM_BANKS(ADDR_SURF_16_BANK));
3236                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3237                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3238                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3239                                  NUM_BANKS(ADDR_SURF_16_BANK));
3240                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3241                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3242                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3243                                  NUM_BANKS(ADDR_SURF_16_BANK));
3244                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3245                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3246                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3247                                  NUM_BANKS(ADDR_SURF_16_BANK));
3248                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3249                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3250                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3251                                  NUM_BANKS(ADDR_SURF_16_BANK));
3252                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3253                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3254                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3255                                  NUM_BANKS(ADDR_SURF_8_BANK));
3256
3257                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3258                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3259                             reg_offset != 23)
3260                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3261
3262                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3263                         if (reg_offset != 7)
3264                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3265
3266                 break;
3267         default:
3268                 dev_warn(adev->dev,
3269                          "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
3270                          adev->asic_type);
3271
3272         case CHIP_CARRIZO:
3273                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3274                                 PIPE_CONFIG(ADDR_SURF_P2) |
3275                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3276                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3277                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3278                                 PIPE_CONFIG(ADDR_SURF_P2) |
3279                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3280                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3281                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3282                                 PIPE_CONFIG(ADDR_SURF_P2) |
3283                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3284                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3285                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3286                                 PIPE_CONFIG(ADDR_SURF_P2) |
3287                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3288                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3289                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3290                                 PIPE_CONFIG(ADDR_SURF_P2) |
3291                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3292                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3293                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3294                                 PIPE_CONFIG(ADDR_SURF_P2) |
3295                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3296                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3297                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3298                                 PIPE_CONFIG(ADDR_SURF_P2) |
3299                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3300                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3301                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3302                                 PIPE_CONFIG(ADDR_SURF_P2));
3303                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3304                                 PIPE_CONFIG(ADDR_SURF_P2) |
3305                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3306                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3307                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3308                                  PIPE_CONFIG(ADDR_SURF_P2) |
3309                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3310                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3311                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3312                                  PIPE_CONFIG(ADDR_SURF_P2) |
3313                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3314                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3315                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3316                                  PIPE_CONFIG(ADDR_SURF_P2) |
3317                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3318                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3319                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3320                                  PIPE_CONFIG(ADDR_SURF_P2) |
3321                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3322                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3323                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3324                                  PIPE_CONFIG(ADDR_SURF_P2) |
3325                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3326                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3327                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3328                                  PIPE_CONFIG(ADDR_SURF_P2) |
3329                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3330                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3331                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3332                                  PIPE_CONFIG(ADDR_SURF_P2) |
3333                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3334                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3335                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3336                                  PIPE_CONFIG(ADDR_SURF_P2) |
3337                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3338                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3339                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3340                                  PIPE_CONFIG(ADDR_SURF_P2) |
3341                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3342                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3343                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3344                                  PIPE_CONFIG(ADDR_SURF_P2) |
3345                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3346                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3347                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3348                                  PIPE_CONFIG(ADDR_SURF_P2) |
3349                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3350                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3351                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3352                                  PIPE_CONFIG(ADDR_SURF_P2) |
3353                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3354                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3355                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3356                                  PIPE_CONFIG(ADDR_SURF_P2) |
3357                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3358                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3359                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3360                                  PIPE_CONFIG(ADDR_SURF_P2) |
3361                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3362                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3363                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3364                                  PIPE_CONFIG(ADDR_SURF_P2) |
3365                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3366                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3367                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3368                                  PIPE_CONFIG(ADDR_SURF_P2) |
3369                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3370                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3371                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3372                                  PIPE_CONFIG(ADDR_SURF_P2) |
3373                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3374                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3375
3376                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3377                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3378                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3379                                 NUM_BANKS(ADDR_SURF_8_BANK));
3380                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3381                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3382                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3383                                 NUM_BANKS(ADDR_SURF_8_BANK));
3384                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3385                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3386                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3387                                 NUM_BANKS(ADDR_SURF_8_BANK));
3388                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3389                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3390                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3391                                 NUM_BANKS(ADDR_SURF_8_BANK));
3392                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3393                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3394                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3395                                 NUM_BANKS(ADDR_SURF_8_BANK));
3396                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3397                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3398                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3399                                 NUM_BANKS(ADDR_SURF_8_BANK));
3400                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3401                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3402                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3403                                 NUM_BANKS(ADDR_SURF_8_BANK));
3404                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3405                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3406                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3407                                 NUM_BANKS(ADDR_SURF_16_BANK));
3408                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3409                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3410                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3411                                 NUM_BANKS(ADDR_SURF_16_BANK));
3412                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3413                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3414                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3415                                  NUM_BANKS(ADDR_SURF_16_BANK));
3416                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3417                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3418                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3419                                  NUM_BANKS(ADDR_SURF_16_BANK));
3420                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3421                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3422                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3423                                  NUM_BANKS(ADDR_SURF_16_BANK));
3424                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3425                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3426                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3427                                  NUM_BANKS(ADDR_SURF_16_BANK));
3428                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3429                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3430                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3431                                  NUM_BANKS(ADDR_SURF_8_BANK));
3432
3433                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3434                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3435                             reg_offset != 23)
3436                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3437
3438                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3439                         if (reg_offset != 7)
3440                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3441
3442                 break;
3443         }
3444 }
3445
3446 static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
3447                                   u32 se_num, u32 sh_num, u32 instance)
3448 {
3449         u32 data;
3450
3451         if (instance == 0xffffffff)
3452                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
3453         else
3454                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
3455
3456         if (se_num == 0xffffffff)
3457                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3458         else
3459                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3460
3461         if (sh_num == 0xffffffff)
3462                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3463         else
3464                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3465
3466         WREG32(mmGRBM_GFX_INDEX, data);
3467 }
3468
3469 static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
3470 {
3471         u32 data, mask;
3472
3473         data =  RREG32(mmCC_RB_BACKEND_DISABLE) |
3474                 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3475
3476         data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE);
3477
3478         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
3479                                          adev->gfx.config.max_sh_per_se);
3480
3481         return (~data) & mask;
3482 }
3483
3484 static void
3485 gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1)
3486 {
3487         switch (adev->asic_type) {
3488         case CHIP_FIJI:
3489                 *rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) |
3490                           RB_XSEL2(1) | PKR_MAP(2) |
3491                           PKR_XSEL(1) | PKR_YSEL(1) |
3492                           SE_MAP(2) | SE_XSEL(2) | SE_YSEL(3);
3493                 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) |
3494                            SE_PAIR_YSEL(2);
3495                 break;
3496         case CHIP_TONGA:
3497         case CHIP_POLARIS10:
3498                 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3499                           SE_XSEL(1) | SE_YSEL(1);
3500                 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(2) |
3501                            SE_PAIR_YSEL(2);
3502                 break;
3503         case CHIP_TOPAZ:
3504         case CHIP_CARRIZO:
3505                 *rconf |= RB_MAP_PKR0(2);
3506                 *rconf1 |= 0x0;
3507                 break;
3508         case CHIP_POLARIS11:
3509         case CHIP_POLARIS12:
3510                 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3511                           SE_XSEL(1) | SE_YSEL(1);
3512                 *rconf1 |= 0x0;
3513                 break;
3514         case CHIP_STONEY:
3515                 *rconf |= 0x0;
3516                 *rconf1 |= 0x0;
3517                 break;
3518         default:
3519                 DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type);
3520                 break;
3521         }
3522 }
3523
3524 static void
3525 gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev,
3526                                         u32 raster_config, u32 raster_config_1,
3527                                         unsigned rb_mask, unsigned num_rb)
3528 {
3529         unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1);
3530         unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1);
3531         unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2);
3532         unsigned rb_per_se = num_rb / num_se;
3533         unsigned se_mask[4];
3534         unsigned se;
3535
3536         se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask;
3537         se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask;
3538         se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask;
3539         se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask;
3540
3541         WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4));
3542         WARN_ON(!(sh_per_se == 1 || sh_per_se == 2));
3543         WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2));
3544
3545         if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
3546                              (!se_mask[2] && !se_mask[3]))) {
3547                 raster_config_1 &= ~SE_PAIR_MAP_MASK;
3548
3549                 if (!se_mask[0] && !se_mask[1]) {
3550                         raster_config_1 |=
3551                                 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3);
3552                 } else {
3553                         raster_config_1 |=
3554                                 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0);
3555                 }
3556         }
3557
3558         for (se = 0; se < num_se; se++) {
3559                 unsigned raster_config_se = raster_config;
3560                 unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
3561                 unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
3562                 int idx = (se / 2) * 2;
3563
3564                 if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
3565                         raster_config_se &= ~SE_MAP_MASK;
3566
3567                         if (!se_mask[idx]) {
3568                                 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3);
3569                         } else {
3570                                 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0);
3571                         }
3572                 }
3573
3574                 pkr0_mask &= rb_mask;
3575                 pkr1_mask &= rb_mask;
3576                 if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
3577                         raster_config_se &= ~PKR_MAP_MASK;
3578
3579                         if (!pkr0_mask) {
3580                                 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3);
3581                         } else {
3582                                 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0);
3583                         }
3584                 }
3585
3586                 if (rb_per_se >= 2) {
3587                         unsigned rb0_mask = 1 << (se * rb_per_se);
3588                         unsigned rb1_mask = rb0_mask << 1;
3589
3590                         rb0_mask &= rb_mask;
3591                         rb1_mask &= rb_mask;
3592                         if (!rb0_mask || !rb1_mask) {
3593                                 raster_config_se &= ~RB_MAP_PKR0_MASK;
3594
3595                                 if (!rb0_mask) {
3596                                         raster_config_se |=
3597                                                 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3);
3598                                 } else {
3599                                         raster_config_se |=
3600                                                 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0);
3601                                 }
3602                         }
3603
3604                         if (rb_per_se > 2) {
3605                                 rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
3606                                 rb1_mask = rb0_mask << 1;
3607                                 rb0_mask &= rb_mask;
3608                                 rb1_mask &= rb_mask;
3609                                 if (!rb0_mask || !rb1_mask) {
3610                                         raster_config_se &= ~RB_MAP_PKR1_MASK;
3611
3612                                         if (!rb0_mask) {
3613                                                 raster_config_se |=
3614                                                         RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3);
3615                                         } else {
3616                                                 raster_config_se |=
3617                                                         RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0);
3618                                         }
3619                                 }
3620                         }
3621                 }
3622
3623                 /* GRBM_GFX_INDEX has a different offset on VI */
3624                 gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff);
3625                 WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se);
3626                 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3627         }
3628
3629         /* GRBM_GFX_INDEX has a different offset on VI */
3630         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3631 }
3632
3633 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
3634 {
3635         int i, j;
3636         u32 data;
3637         u32 raster_config = 0, raster_config_1 = 0;
3638         u32 active_rbs = 0;
3639         u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
3640                                         adev->gfx.config.max_sh_per_se;
3641         unsigned num_rb_pipes;
3642
3643         mutex_lock(&adev->grbm_idx_mutex);
3644         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3645                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3646                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3647                         data = gfx_v8_0_get_rb_active_bitmap(adev);
3648                         active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
3649                                                rb_bitmap_width_per_sh);
3650                 }
3651         }
3652         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3653
3654         adev->gfx.config.backend_enable_mask = active_rbs;
3655         adev->gfx.config.num_rbs = hweight32(active_rbs);
3656
3657         num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se *
3658                              adev->gfx.config.max_shader_engines, 16);
3659
3660         gfx_v8_0_raster_config(adev, &raster_config, &raster_config_1);
3661
3662         if (!adev->gfx.config.backend_enable_mask ||
3663                         adev->gfx.config.num_rbs >= num_rb_pipes) {
3664                 WREG32(mmPA_SC_RASTER_CONFIG, raster_config);
3665                 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3666         } else {
3667                 gfx_v8_0_write_harvested_raster_configs(adev, raster_config, raster_config_1,
3668                                                         adev->gfx.config.backend_enable_mask,
3669                                                         num_rb_pipes);
3670         }
3671
3672         /* cache the values for userspace */
3673         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3674                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3675                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3676                         adev->gfx.config.rb_config[i][j].rb_backend_disable =
3677                                 RREG32(mmCC_RB_BACKEND_DISABLE);
3678                         adev->gfx.config.rb_config[i][j].user_rb_backend_disable =
3679                                 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3680                         adev->gfx.config.rb_config[i][j].raster_config =
3681                                 RREG32(mmPA_SC_RASTER_CONFIG);
3682                         adev->gfx.config.rb_config[i][j].raster_config_1 =
3683                                 RREG32(mmPA_SC_RASTER_CONFIG_1);
3684                 }
3685         }
3686         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3687         mutex_unlock(&adev->grbm_idx_mutex);
3688 }
3689
3690 /**
3691  * gfx_v8_0_init_compute_vmid - gart enable
3692  *
3693  * @adev: amdgpu_device pointer
3694  *
3695  * Initialize compute vmid sh_mem registers
3696  *
3697  */
3698 #define DEFAULT_SH_MEM_BASES    (0x6000)
3699 #define FIRST_COMPUTE_VMID      (8)
3700 #define LAST_COMPUTE_VMID       (16)
3701 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
3702 {
3703         int i;
3704         uint32_t sh_mem_config;
3705         uint32_t sh_mem_bases;
3706
3707         /*
3708          * Configure apertures:
3709          * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
3710          * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
3711          * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
3712          */
3713         sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
3714
3715         sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
3716                         SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
3717                         SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
3718                         SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
3719                         MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
3720                         SH_MEM_CONFIG__PRIVATE_ATC_MASK;
3721
3722         mutex_lock(&adev->srbm_mutex);
3723         for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
3724                 vi_srbm_select(adev, 0, 0, 0, i);
3725                 /* CP and shaders */
3726                 WREG32(mmSH_MEM_CONFIG, sh_mem_config);
3727                 WREG32(mmSH_MEM_APE1_BASE, 1);
3728                 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3729                 WREG32(mmSH_MEM_BASES, sh_mem_bases);
3730         }
3731         vi_srbm_select(adev, 0, 0, 0, 0);
3732         mutex_unlock(&adev->srbm_mutex);
3733 }
3734
3735 static void gfx_v8_0_config_init(struct amdgpu_device *adev)
3736 {
3737         switch (adev->asic_type) {
3738         default:
3739                 adev->gfx.config.double_offchip_lds_buf = 1;
3740                 break;
3741         case CHIP_CARRIZO:
3742         case CHIP_STONEY:
3743                 adev->gfx.config.double_offchip_lds_buf = 0;
3744                 break;
3745         }
3746 }
3747
3748 static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
3749 {
3750         u32 tmp, sh_static_mem_cfg;
3751         int i;
3752
3753         WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF);
3754         WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3755         WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3756         WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
3757
3758         gfx_v8_0_tiling_mode_table_init(adev);
3759         gfx_v8_0_setup_rb(adev);
3760         gfx_v8_0_get_cu_info(adev);
3761         gfx_v8_0_config_init(adev);
3762
3763         /* XXX SH_MEM regs */
3764         /* where to put LDS, scratch, GPUVM in FSA64 space */
3765         sh_static_mem_cfg = REG_SET_FIELD(0, SH_STATIC_MEM_CONFIG,
3766                                    SWIZZLE_ENABLE, 1);
3767         sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3768                                    ELEMENT_SIZE, 1);
3769         sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3770                                    INDEX_STRIDE, 3);
3771         WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg);
3772
3773         mutex_lock(&adev->srbm_mutex);
3774         for (i = 0; i < adev->vm_manager.id_mgr[0].num_ids; i++) {
3775                 vi_srbm_select(adev, 0, 0, 0, i);
3776                 /* CP and shaders */
3777                 if (i == 0) {
3778                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
3779                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3780                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3781                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3782                         WREG32(mmSH_MEM_CONFIG, tmp);
3783                         WREG32(mmSH_MEM_BASES, 0);
3784                 } else {
3785                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
3786                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3787                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3788                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3789                         WREG32(mmSH_MEM_CONFIG, tmp);
3790                         tmp = adev->mc.shared_aperture_start >> 48;
3791                         WREG32(mmSH_MEM_BASES, tmp);
3792                 }
3793
3794                 WREG32(mmSH_MEM_APE1_BASE, 1);
3795                 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3796         }
3797         vi_srbm_select(adev, 0, 0, 0, 0);
3798         mutex_unlock(&adev->srbm_mutex);
3799
3800         gfx_v8_0_init_compute_vmid(adev);
3801
3802         mutex_lock(&adev->grbm_idx_mutex);
3803         /*
3804          * making sure that the following register writes will be broadcasted
3805          * to all the shaders
3806          */
3807         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3808
3809         WREG32(mmPA_SC_FIFO_SIZE,
3810                    (adev->gfx.config.sc_prim_fifo_size_frontend <<
3811                         PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
3812                    (adev->gfx.config.sc_prim_fifo_size_backend <<
3813                         PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
3814                    (adev->gfx.config.sc_hiz_tile_fifo_size <<
3815                         PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
3816                    (adev->gfx.config.sc_earlyz_tile_fifo_size <<
3817                         PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
3818
3819         tmp = RREG32(mmSPI_ARB_PRIORITY);
3820         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS0, 2);
3821         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS1, 2);
3822         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS2, 2);
3823         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS3, 2);
3824         WREG32(mmSPI_ARB_PRIORITY, tmp);
3825
3826         mutex_unlock(&adev->grbm_idx_mutex);
3827
3828 }
3829
3830 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3831 {
3832         u32 i, j, k;
3833         u32 mask;
3834
3835         mutex_lock(&adev->grbm_idx_mutex);
3836         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3837                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3838                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3839                         for (k = 0; k < adev->usec_timeout; k++) {
3840                                 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3841                                         break;
3842                                 udelay(1);
3843                         }
3844                 }
3845         }
3846         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3847         mutex_unlock(&adev->grbm_idx_mutex);
3848
3849         mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3850                 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3851                 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3852                 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3853         for (k = 0; k < adev->usec_timeout; k++) {
3854                 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3855                         break;
3856                 udelay(1);
3857         }
3858 }
3859
3860 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3861                                                bool enable)
3862 {
3863         u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3864
3865         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
3866         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
3867         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
3868         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
3869
3870         WREG32(mmCP_INT_CNTL_RING0, tmp);
3871 }
3872
3873 static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
3874 {
3875         /* csib */
3876         WREG32(mmRLC_CSIB_ADDR_HI,
3877                         adev->gfx.rlc.clear_state_gpu_addr >> 32);
3878         WREG32(mmRLC_CSIB_ADDR_LO,
3879                         adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
3880         WREG32(mmRLC_CSIB_LENGTH,
3881                         adev->gfx.rlc.clear_state_size);
3882 }
3883
3884 static void gfx_v8_0_parse_ind_reg_list(int *register_list_format,
3885                                 int ind_offset,
3886                                 int list_size,
3887                                 int *unique_indices,
3888                                 int *indices_count,
3889                                 int max_indices,
3890                                 int *ind_start_offsets,
3891                                 int *offset_count,
3892                                 int max_offset)
3893 {
3894         int indices;
3895         bool new_entry = true;
3896
3897         for (; ind_offset < list_size; ind_offset++) {
3898
3899                 if (new_entry) {
3900                         new_entry = false;
3901                         ind_start_offsets[*offset_count] = ind_offset;
3902                         *offset_count = *offset_count + 1;
3903                         BUG_ON(*offset_count >= max_offset);
3904                 }
3905
3906                 if (register_list_format[ind_offset] == 0xFFFFFFFF) {
3907                         new_entry = true;
3908                         continue;
3909                 }
3910
3911                 ind_offset += 2;
3912
3913                 /* look for the matching indice */
3914                 for (indices = 0;
3915                         indices < *indices_count;
3916                         indices++) {
3917                         if (unique_indices[indices] ==
3918                                 register_list_format[ind_offset])
3919                                 break;
3920                 }
3921
3922                 if (indices >= *indices_count) {
3923                         unique_indices[*indices_count] =
3924                                 register_list_format[ind_offset];
3925                         indices = *indices_count;
3926                         *indices_count = *indices_count + 1;
3927                         BUG_ON(*indices_count >= max_indices);
3928                 }
3929
3930                 register_list_format[ind_offset] = indices;
3931         }
3932 }
3933
3934 static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
3935 {
3936         int i, temp, data;
3937         int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0};
3938         int indices_count = 0;
3939         int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3940         int offset_count = 0;
3941
3942         int list_size;
3943         unsigned int *register_list_format =
3944                 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
3945         if (!register_list_format)
3946                 return -ENOMEM;
3947         memcpy(register_list_format, adev->gfx.rlc.register_list_format,
3948                         adev->gfx.rlc.reg_list_format_size_bytes);
3949
3950         gfx_v8_0_parse_ind_reg_list(register_list_format,
3951                                 RLC_FormatDirectRegListLength,
3952                                 adev->gfx.rlc.reg_list_format_size_bytes >> 2,
3953                                 unique_indices,
3954                                 &indices_count,
3955                                 sizeof(unique_indices) / sizeof(int),
3956                                 indirect_start_offsets,
3957                                 &offset_count,
3958                                 sizeof(indirect_start_offsets)/sizeof(int));
3959
3960         /* save and restore list */
3961         WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1);
3962
3963         WREG32(mmRLC_SRM_ARAM_ADDR, 0);
3964         for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
3965                 WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]);
3966
3967         /* indirect list */
3968         WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start);
3969         for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
3970                 WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]);
3971
3972         list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
3973         list_size = list_size >> 1;
3974         WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size);
3975         WREG32(mmRLC_GPM_SCRATCH_DATA, list_size);
3976
3977         /* starting offsets starts */
3978         WREG32(mmRLC_GPM_SCRATCH_ADDR,
3979                 adev->gfx.rlc.starting_offsets_start);
3980         for (i = 0; i < sizeof(indirect_start_offsets)/sizeof(int); i++)
3981                 WREG32(mmRLC_GPM_SCRATCH_DATA,
3982                                 indirect_start_offsets[i]);
3983
3984         /* unique indices */
3985         temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
3986         data = mmRLC_SRM_INDEX_CNTL_DATA_0;
3987         for (i = 0; i < sizeof(unique_indices) / sizeof(int); i++) {
3988                 if (unique_indices[i] != 0) {
3989                         WREG32(temp + i, unique_indices[i] & 0x3FFFF);
3990                         WREG32(data + i, unique_indices[i] >> 20);
3991                 }
3992         }
3993         kfree(register_list_format);
3994
3995         return 0;
3996 }
3997
3998 static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
3999 {
4000         WREG32_FIELD(RLC_SRM_CNTL, SRM_ENABLE, 1);
4001 }
4002
4003 static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev)
4004 {
4005         uint32_t data;
4006
4007         WREG32_FIELD(CP_RB_WPTR_POLL_CNTL, IDLE_POLL_COUNT, 0x60);
4008
4009         data = REG_SET_FIELD(0, RLC_PG_DELAY, POWER_UP_DELAY, 0x10);
4010         data = REG_SET_FIELD(data, RLC_PG_DELAY, POWER_DOWN_DELAY, 0x10);
4011         data = REG_SET_FIELD(data, RLC_PG_DELAY, CMD_PROPAGATE_DELAY, 0x10);
4012         data = REG_SET_FIELD(data, RLC_PG_DELAY, MEM_SLEEP_DELAY, 0x10);
4013         WREG32(mmRLC_PG_DELAY, data);
4014
4015         WREG32_FIELD(RLC_PG_DELAY_2, SERDES_CMD_DELAY, 0x3);
4016         WREG32_FIELD(RLC_AUTO_PG_CTRL, GRBM_REG_SAVE_GFX_IDLE_THRESHOLD, 0x55f0);
4017
4018 }
4019
4020 static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
4021                                                 bool enable)
4022 {
4023         WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PU_ENABLE, enable ? 1 : 0);
4024 }
4025
4026 static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
4027                                                   bool enable)
4028 {
4029         WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PD_ENABLE, enable ? 1 : 0);
4030 }
4031
4032 static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable)
4033 {
4034         WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 0 : 1);
4035 }
4036
4037 static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
4038 {
4039         if ((adev->asic_type == CHIP_CARRIZO) ||
4040             (adev->asic_type == CHIP_STONEY)) {
4041                 gfx_v8_0_init_csb(adev);
4042                 gfx_v8_0_init_save_restore_list(adev);
4043                 gfx_v8_0_enable_save_restore_machine(adev);
4044                 WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
4045                 gfx_v8_0_init_power_gating(adev);
4046                 WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
4047         } else if ((adev->asic_type == CHIP_POLARIS11) ||
4048                    (adev->asic_type == CHIP_POLARIS12)) {
4049                 gfx_v8_0_init_csb(adev);
4050                 gfx_v8_0_init_save_restore_list(adev);
4051                 gfx_v8_0_enable_save_restore_machine(adev);
4052                 gfx_v8_0_init_power_gating(adev);
4053         }
4054
4055 }
4056
4057 static void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
4058 {
4059         WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 0);
4060
4061         gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4062         gfx_v8_0_wait_for_rlc_serdes(adev);
4063 }
4064
4065 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
4066 {
4067         WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4068         udelay(50);
4069
4070         WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
4071         udelay(50);
4072 }
4073
4074 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
4075 {
4076         WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 1);
4077
4078         /* carrizo do enable cp interrupt after cp inited */
4079         if (!(adev->flags & AMD_IS_APU))
4080                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4081
4082         udelay(50);
4083 }
4084
4085 static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev)
4086 {
4087         const struct rlc_firmware_header_v2_0 *hdr;
4088         const __le32 *fw_data;
4089         unsigned i, fw_size;
4090
4091         if (!adev->gfx.rlc_fw)
4092                 return -EINVAL;
4093
4094         hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
4095         amdgpu_ucode_print_rlc_hdr(&hdr->header);
4096
4097         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
4098                            le32_to_cpu(hdr->header.ucode_array_offset_bytes));
4099         fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
4100
4101         WREG32(mmRLC_GPM_UCODE_ADDR, 0);
4102         for (i = 0; i < fw_size; i++)
4103                 WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
4104         WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
4105
4106         return 0;
4107 }
4108
4109 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
4110 {
4111         int r;
4112         u32 tmp;
4113
4114         gfx_v8_0_rlc_stop(adev);
4115
4116         /* disable CG */
4117         tmp = RREG32(mmRLC_CGCG_CGLS_CTRL);
4118         tmp &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
4119                  RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4120         WREG32(mmRLC_CGCG_CGLS_CTRL, tmp);
4121         if (adev->asic_type == CHIP_POLARIS11 ||
4122             adev->asic_type == CHIP_POLARIS10 ||
4123             adev->asic_type == CHIP_POLARIS12) {
4124                 tmp = RREG32(mmRLC_CGCG_CGLS_CTRL_3D);
4125                 tmp &= ~0x3;
4126                 WREG32(mmRLC_CGCG_CGLS_CTRL_3D, tmp);
4127         }
4128
4129         /* disable PG */
4130         WREG32(mmRLC_PG_CNTL, 0);
4131
4132         gfx_v8_0_rlc_reset(adev);
4133         gfx_v8_0_init_pg(adev);
4134
4135
4136         if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
4137                 /* legacy rlc firmware loading */
4138                 r = gfx_v8_0_rlc_load_microcode(adev);
4139                 if (r)
4140                         return r;
4141         }
4142
4143         gfx_v8_0_rlc_start(adev);
4144
4145         return 0;
4146 }
4147
4148 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
4149 {
4150         int i;
4151         u32 tmp = RREG32(mmCP_ME_CNTL);
4152
4153         if (enable) {
4154                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
4155                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
4156                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
4157         } else {
4158                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
4159                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
4160                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
4161                 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
4162                         adev->gfx.gfx_ring[i].ready = false;
4163         }
4164         WREG32(mmCP_ME_CNTL, tmp);
4165         udelay(50);
4166 }
4167
4168 static int gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
4169 {
4170         const struct gfx_firmware_header_v1_0 *pfp_hdr;
4171         const struct gfx_firmware_header_v1_0 *ce_hdr;
4172         const struct gfx_firmware_header_v1_0 *me_hdr;
4173         const __le32 *fw_data;
4174         unsigned i, fw_size;
4175
4176         if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
4177                 return -EINVAL;
4178
4179         pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
4180                 adev->gfx.pfp_fw->data;
4181         ce_hdr = (const struct gfx_firmware_header_v1_0 *)
4182                 adev->gfx.ce_fw->data;
4183         me_hdr = (const struct gfx_firmware_header_v1_0 *)
4184                 adev->gfx.me_fw->data;
4185
4186         amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
4187         amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
4188         amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
4189
4190         gfx_v8_0_cp_gfx_enable(adev, false);
4191
4192         /* PFP */
4193         fw_data = (const __le32 *)
4194                 (adev->gfx.pfp_fw->data +
4195                  le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
4196         fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
4197         WREG32(mmCP_PFP_UCODE_ADDR, 0);
4198         for (i = 0; i < fw_size; i++)
4199                 WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
4200         WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
4201
4202         /* CE */
4203         fw_data = (const __le32 *)
4204                 (adev->gfx.ce_fw->data +
4205                  le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
4206         fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
4207         WREG32(mmCP_CE_UCODE_ADDR, 0);
4208         for (i = 0; i < fw_size; i++)
4209                 WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
4210         WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
4211
4212         /* ME */
4213         fw_data = (const __le32 *)
4214                 (adev->gfx.me_fw->data +
4215                  le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
4216         fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
4217         WREG32(mmCP_ME_RAM_WADDR, 0);
4218         for (i = 0; i < fw_size; i++)
4219                 WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
4220         WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
4221
4222         return 0;
4223 }
4224
4225 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
4226 {
4227         u32 count = 0;
4228         const struct cs_section_def *sect = NULL;
4229         const struct cs_extent_def *ext = NULL;
4230
4231         /* begin clear state */
4232         count += 2;
4233         /* context control state */
4234         count += 3;
4235
4236         for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4237                 for (ext = sect->section; ext->extent != NULL; ++ext) {
4238                         if (sect->id == SECT_CONTEXT)
4239                                 count += 2 + ext->reg_count;
4240                         else
4241                                 return 0;
4242                 }
4243         }
4244         /* pa_sc_raster_config/pa_sc_raster_config1 */
4245         count += 4;
4246         /* end clear state */
4247         count += 2;
4248         /* clear state */
4249         count += 2;
4250
4251         return count;
4252 }
4253
4254 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
4255 {
4256         struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
4257         const struct cs_section_def *sect = NULL;
4258         const struct cs_extent_def *ext = NULL;
4259         int r, i;
4260
4261         /* init the CP */
4262         WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
4263         WREG32(mmCP_ENDIAN_SWAP, 0);
4264         WREG32(mmCP_DEVICE_ID, 1);
4265
4266         gfx_v8_0_cp_gfx_enable(adev, true);
4267
4268         r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
4269         if (r) {
4270                 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
4271                 return r;
4272         }
4273
4274         /* clear state buffer */
4275         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4276         amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4277
4278         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4279         amdgpu_ring_write(ring, 0x80000000);
4280         amdgpu_ring_write(ring, 0x80000000);
4281
4282         for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4283                 for (ext = sect->section; ext->extent != NULL; ++ext) {
4284                         if (sect->id == SECT_CONTEXT) {
4285                                 amdgpu_ring_write(ring,
4286                                        PACKET3(PACKET3_SET_CONTEXT_REG,
4287                                                ext->reg_count));
4288                                 amdgpu_ring_write(ring,
4289                                        ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
4290                                 for (i = 0; i < ext->reg_count; i++)
4291                                         amdgpu_ring_write(ring, ext->extent[i]);
4292                         }
4293                 }
4294         }
4295
4296         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4297         amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
4298         switch (adev->asic_type) {
4299         case CHIP_TONGA:
4300         case CHIP_POLARIS10:
4301                 amdgpu_ring_write(ring, 0x16000012);
4302                 amdgpu_ring_write(ring, 0x0000002A);
4303                 break;
4304         case CHIP_POLARIS11:
4305         case CHIP_POLARIS12:
4306                 amdgpu_ring_write(ring, 0x16000012);
4307                 amdgpu_ring_write(ring, 0x00000000);
4308                 break;
4309         case CHIP_FIJI:
4310                 amdgpu_ring_write(ring, 0x3a00161a);
4311                 amdgpu_ring_write(ring, 0x0000002e);
4312                 break;
4313         case CHIP_CARRIZO:
4314                 amdgpu_ring_write(ring, 0x00000002);
4315                 amdgpu_ring_write(ring, 0x00000000);
4316                 break;
4317         case CHIP_TOPAZ:
4318                 amdgpu_ring_write(ring, adev->gfx.config.num_rbs == 1 ?
4319                                 0x00000000 : 0x00000002);
4320                 amdgpu_ring_write(ring, 0x00000000);
4321                 break;
4322         case CHIP_STONEY:
4323                 amdgpu_ring_write(ring, 0x00000000);
4324                 amdgpu_ring_write(ring, 0x00000000);
4325                 break;
4326         default:
4327                 BUG();
4328         }
4329
4330         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4331         amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4332
4333         amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4334         amdgpu_ring_write(ring, 0);
4335
4336         /* init the CE partitions */
4337         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4338         amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4339         amdgpu_ring_write(ring, 0x8000);
4340         amdgpu_ring_write(ring, 0x8000);
4341
4342         amdgpu_ring_commit(ring);
4343
4344         return 0;
4345 }
4346 static void gfx_v8_0_set_cpg_door_bell(struct amdgpu_device *adev, struct amdgpu_ring *ring)
4347 {
4348         u32 tmp;
4349         /* no gfx doorbells on iceland */
4350         if (adev->asic_type == CHIP_TOPAZ)
4351                 return;
4352
4353         tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
4354
4355         if (ring->use_doorbell) {
4356                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4357                                 DOORBELL_OFFSET, ring->doorbell_index);
4358                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4359                                                 DOORBELL_HIT, 0);
4360                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4361                                             DOORBELL_EN, 1);
4362         } else {
4363                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
4364         }
4365
4366         WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
4367
4368         if (adev->flags & AMD_IS_APU)
4369                 return;
4370
4371         tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
4372                                         DOORBELL_RANGE_LOWER,
4373                                         AMDGPU_DOORBELL_GFX_RING0);
4374         WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
4375
4376         WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
4377                 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
4378 }
4379
4380 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
4381 {
4382         struct amdgpu_ring *ring;
4383         u32 tmp;
4384         u32 rb_bufsz;
4385         u64 rb_addr, rptr_addr, wptr_gpu_addr;
4386         int r;
4387
4388         /* Set the write pointer delay */
4389         WREG32(mmCP_RB_WPTR_DELAY, 0);
4390
4391         /* set the RB to use vmid 0 */
4392         WREG32(mmCP_RB_VMID, 0);
4393
4394         /* Set ring buffer size */
4395         ring = &adev->gfx.gfx_ring[0];
4396         rb_bufsz = order_base_2(ring->ring_size / 8);
4397         tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
4398         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
4399         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
4400         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
4401 #ifdef __BIG_ENDIAN
4402         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
4403 #endif
4404         WREG32(mmCP_RB0_CNTL, tmp);
4405
4406         /* Initialize the ring buffer's read and write pointers */
4407         WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
4408         ring->wptr = 0;
4409         WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
4410
4411         /* set the wb address wether it's enabled or not */
4412         rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4413         WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
4414         WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
4415
4416         wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4417         WREG32(mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
4418         WREG32(mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
4419         mdelay(1);
4420         WREG32(mmCP_RB0_CNTL, tmp);
4421
4422         rb_addr = ring->gpu_addr >> 8;
4423         WREG32(mmCP_RB0_BASE, rb_addr);
4424         WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
4425
4426         gfx_v8_0_set_cpg_door_bell(adev, ring);
4427         /* start the ring */
4428         amdgpu_ring_clear_ring(ring);
4429         gfx_v8_0_cp_gfx_start(adev);
4430         ring->ready = true;
4431         r = amdgpu_ring_test_ring(ring);
4432         if (r)
4433                 ring->ready = false;
4434
4435         return r;
4436 }
4437
4438 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
4439 {
4440         int i;
4441
4442         if (enable) {
4443                 WREG32(mmCP_MEC_CNTL, 0);
4444         } else {
4445                 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
4446                 for (i = 0; i < adev->gfx.num_compute_rings; i++)
4447                         adev->gfx.compute_ring[i].ready = false;
4448                 adev->gfx.kiq.ring.ready = false;
4449         }
4450         udelay(50);
4451 }
4452
4453 static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev)
4454 {
4455         const struct gfx_firmware_header_v1_0 *mec_hdr;
4456         const __le32 *fw_data;
4457         unsigned i, fw_size;
4458
4459         if (!adev->gfx.mec_fw)
4460                 return -EINVAL;
4461
4462         gfx_v8_0_cp_compute_enable(adev, false);
4463
4464         mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
4465         amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
4466
4467         fw_data = (const __le32 *)
4468                 (adev->gfx.mec_fw->data +
4469                  le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4470         fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4471
4472         /* MEC1 */
4473         WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0);
4474         for (i = 0; i < fw_size; i++)
4475                 WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data+i));
4476         WREG32(mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version);
4477
4478         /* Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
4479         if (adev->gfx.mec2_fw) {
4480                 const struct gfx_firmware_header_v1_0 *mec2_hdr;
4481
4482                 mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
4483                 amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header);
4484
4485                 fw_data = (const __le32 *)
4486                         (adev->gfx.mec2_fw->data +
4487                          le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4488                 fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4489
4490                 WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0);
4491                 for (i = 0; i < fw_size; i++)
4492                         WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data+i));
4493                 WREG32(mmCP_MEC_ME2_UCODE_ADDR, adev->gfx.mec2_fw_version);
4494         }
4495
4496         return 0;
4497 }
4498
4499 /* KIQ functions */
4500 static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring)
4501 {
4502         uint32_t tmp;
4503         struct amdgpu_device *adev = ring->adev;
4504
4505         /* tell RLC which is KIQ queue */
4506         tmp = RREG32(mmRLC_CP_SCHEDULERS);
4507         tmp &= 0xffffff00;
4508         tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
4509         WREG32(mmRLC_CP_SCHEDULERS, tmp);
4510         tmp |= 0x80;
4511         WREG32(mmRLC_CP_SCHEDULERS, tmp);
4512 }
4513
4514 static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev)
4515 {
4516         struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
4517         uint32_t scratch, tmp = 0;
4518         uint64_t queue_mask = 0;
4519         int r, i;
4520
4521         for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
4522                 if (!test_bit(i, adev->gfx.mec.queue_bitmap))
4523                         continue;
4524
4525                 /* This situation may be hit in the future if a new HW
4526                  * generation exposes more than 64 queues. If so, the
4527                  * definition of queue_mask needs updating */
4528                 if (WARN_ON(i >= (sizeof(queue_mask)*8))) {
4529                         DRM_ERROR("Invalid KCQ enabled: %d\n", i);
4530                         break;
4531                 }
4532
4533                 queue_mask |= (1ull << i);
4534         }
4535
4536         r = amdgpu_gfx_scratch_get(adev, &scratch);
4537         if (r) {
4538                 DRM_ERROR("Failed to get scratch reg (%d).\n", r);
4539                 return r;
4540         }
4541         WREG32(scratch, 0xCAFEDEAD);
4542
4543         r = amdgpu_ring_alloc(kiq_ring, (8 * adev->gfx.num_compute_rings) + 11);
4544         if (r) {
4545                 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
4546                 amdgpu_gfx_scratch_free(adev, scratch);
4547                 return r;
4548         }
4549         /* set resources */
4550         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
4551         amdgpu_ring_write(kiq_ring, 0); /* vmid_mask:0 queue_type:0 (KIQ) */
4552         amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */
4553         amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */
4554         amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
4555         amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
4556         amdgpu_ring_write(kiq_ring, 0); /* oac mask */
4557         amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
4558         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4559                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4560                 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
4561                 uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4562
4563                 /* map queues */
4564                 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
4565                 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
4566                 amdgpu_ring_write(kiq_ring,
4567                                   PACKET3_MAP_QUEUES_NUM_QUEUES(1));
4568                 amdgpu_ring_write(kiq_ring,
4569                                   PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index) |
4570                                   PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
4571                                   PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
4572                                   PACKET3_MAP_QUEUES_ME(ring->me == 1 ? 0 : 1)); /* doorbell */
4573                 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
4574                 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
4575                 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
4576                 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
4577         }
4578         /* write to scratch for completion */
4579         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
4580         amdgpu_ring_write(kiq_ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
4581         amdgpu_ring_write(kiq_ring, 0xDEADBEEF);
4582         amdgpu_ring_commit(kiq_ring);
4583
4584         for (i = 0; i < adev->usec_timeout; i++) {
4585                 tmp = RREG32(scratch);
4586                 if (tmp == 0xDEADBEEF)
4587                         break;
4588                 DRM_UDELAY(1);
4589         }
4590         if (i >= adev->usec_timeout) {
4591                 DRM_ERROR("KCQ enable failed (scratch(0x%04X)=0x%08X)\n",
4592                           scratch, tmp);
4593                 r = -EINVAL;
4594         }
4595         amdgpu_gfx_scratch_free(adev, scratch);
4596
4597         return r;
4598 }
4599
4600 static int gfx_v8_0_deactivate_hqd(struct amdgpu_device *adev, u32 req)
4601 {
4602         int i, r = 0;
4603
4604         if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) {
4605                 WREG32_FIELD(CP_HQD_DEQUEUE_REQUEST, DEQUEUE_REQ, req);
4606                 for (i = 0; i < adev->usec_timeout; i++) {
4607                         if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK))
4608                                 break;
4609                         udelay(1);
4610                 }
4611                 if (i == adev->usec_timeout)
4612                         r = -ETIMEDOUT;
4613         }
4614         WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
4615         WREG32(mmCP_HQD_PQ_RPTR, 0);
4616         WREG32(mmCP_HQD_PQ_WPTR, 0);
4617
4618         return r;
4619 }
4620
4621 static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
4622 {
4623         struct amdgpu_device *adev = ring->adev;
4624         struct vi_mqd *mqd = ring->mqd_ptr;
4625         uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
4626         uint32_t tmp;
4627
4628         mqd->header = 0xC0310800;
4629         mqd->compute_pipelinestat_enable = 0x00000001;
4630         mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4631         mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4632         mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4633         mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4634         mqd->compute_misc_reserved = 0x00000003;
4635         mqd->dynamic_cu_mask_addr_lo = lower_32_bits(ring->mqd_gpu_addr
4636                                                      + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4637         mqd->dynamic_cu_mask_addr_hi = upper_32_bits(ring->mqd_gpu_addr
4638                                                      + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4639         eop_base_addr = ring->eop_gpu_addr >> 8;
4640         mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
4641         mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
4642
4643         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4644         tmp = RREG32(mmCP_HQD_EOP_CONTROL);
4645         tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
4646                         (order_base_2(GFX8_MEC_HPD_SIZE / 4) - 1));
4647
4648         mqd->cp_hqd_eop_control = tmp;
4649
4650         /* enable doorbell? */
4651         tmp = REG_SET_FIELD(RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL),
4652                             CP_HQD_PQ_DOORBELL_CONTROL,
4653                             DOORBELL_EN,
4654                             ring->use_doorbell ? 1 : 0);
4655
4656         mqd->cp_hqd_pq_doorbell_control = tmp;
4657
4658         /* set the pointer to the MQD */
4659         mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
4660         mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
4661
4662         /* set MQD vmid to 0 */
4663         tmp = RREG32(mmCP_MQD_CONTROL);
4664         tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4665         mqd->cp_mqd_control = tmp;
4666
4667         /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4668         hqd_gpu_addr = ring->gpu_addr >> 8;
4669         mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4670         mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4671
4672         /* set up the HQD, this is similar to CP_RB0_CNTL */
4673         tmp = RREG32(mmCP_HQD_PQ_CONTROL);
4674         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4675                             (order_base_2(ring->ring_size / 4) - 1));
4676         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4677                         ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
4678 #ifdef __BIG_ENDIAN
4679         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
4680 #endif
4681         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
4682         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
4683         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4684         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4685         mqd->cp_hqd_pq_control = tmp;
4686
4687         /* set the wb address whether it's enabled or not */
4688         wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4689         mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4690         mqd->cp_hqd_pq_rptr_report_addr_hi =
4691                 upper_32_bits(wb_gpu_addr) & 0xffff;
4692
4693         /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4694         wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4695         mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
4696         mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4697
4698         tmp = 0;
4699         /* enable the doorbell if requested */
4700         if (ring->use_doorbell) {
4701                 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4702                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4703                                 DOORBELL_OFFSET, ring->doorbell_index);
4704
4705                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4706                                          DOORBELL_EN, 1);
4707                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4708                                          DOORBELL_SOURCE, 0);
4709                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4710                                          DOORBELL_HIT, 0);
4711         }
4712
4713         mqd->cp_hqd_pq_doorbell_control = tmp;
4714
4715         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4716         ring->wptr = 0;
4717         mqd->cp_hqd_pq_wptr = ring->wptr;
4718         mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
4719
4720         /* set the vmid for the queue */
4721         mqd->cp_hqd_vmid = 0;
4722
4723         tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
4724         tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
4725         mqd->cp_hqd_persistent_state = tmp;
4726
4727         /* set MTYPE */
4728         tmp = RREG32(mmCP_HQD_IB_CONTROL);
4729         tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
4730         tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MTYPE, 3);
4731         mqd->cp_hqd_ib_control = tmp;
4732
4733         tmp = RREG32(mmCP_HQD_IQ_TIMER);
4734         tmp = REG_SET_FIELD(tmp, CP_HQD_IQ_TIMER, MTYPE, 3);
4735         mqd->cp_hqd_iq_timer = tmp;
4736
4737         tmp = RREG32(mmCP_HQD_CTX_SAVE_CONTROL);
4738         tmp = REG_SET_FIELD(tmp, CP_HQD_CTX_SAVE_CONTROL, MTYPE, 3);
4739         mqd->cp_hqd_ctx_save_control = tmp;
4740
4741         /* defaults */
4742         mqd->cp_hqd_eop_rptr = RREG32(mmCP_HQD_EOP_RPTR);
4743         mqd->cp_hqd_eop_wptr = RREG32(mmCP_HQD_EOP_WPTR);
4744         mqd->cp_hqd_pipe_priority = RREG32(mmCP_HQD_PIPE_PRIORITY);
4745         mqd->cp_hqd_queue_priority = RREG32(mmCP_HQD_QUEUE_PRIORITY);
4746         mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM);
4747         mqd->cp_hqd_ctx_save_base_addr_lo = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_LO);
4748         mqd->cp_hqd_ctx_save_base_addr_hi = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_HI);
4749         mqd->cp_hqd_cntl_stack_offset = RREG32(mmCP_HQD_CNTL_STACK_OFFSET);
4750         mqd->cp_hqd_cntl_stack_size = RREG32(mmCP_HQD_CNTL_STACK_SIZE);
4751         mqd->cp_hqd_wg_state_offset = RREG32(mmCP_HQD_WG_STATE_OFFSET);
4752         mqd->cp_hqd_ctx_save_size = RREG32(mmCP_HQD_CTX_SAVE_SIZE);
4753         mqd->cp_hqd_eop_done_events = RREG32(mmCP_HQD_EOP_EVENTS);
4754         mqd->cp_hqd_error = RREG32(mmCP_HQD_ERROR);
4755         mqd->cp_hqd_eop_wptr_mem = RREG32(mmCP_HQD_EOP_WPTR_MEM);
4756         mqd->cp_hqd_eop_dones = RREG32(mmCP_HQD_EOP_DONES);
4757
4758         /* activate the queue */
4759         mqd->cp_hqd_active = 1;
4760
4761         return 0;
4762 }
4763
4764 int gfx_v8_0_mqd_commit(struct amdgpu_device *adev,
4765                         struct vi_mqd *mqd)
4766 {
4767         uint32_t mqd_reg;
4768         uint32_t *mqd_data;
4769
4770         /* HQD registers extend from mmCP_MQD_BASE_ADDR to mmCP_HQD_ERROR */
4771         mqd_data = &mqd->cp_mqd_base_addr_lo;
4772
4773         /* disable wptr polling */
4774         WREG32_FIELD(CP_PQ_WPTR_POLL_CNTL, EN, 0);
4775
4776         /* program all HQD registers */
4777         for (mqd_reg = mmCP_HQD_VMID; mqd_reg <= mmCP_HQD_EOP_CONTROL; mqd_reg++)
4778                 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4779
4780         /* Tonga errata: EOP RPTR/WPTR should be left unmodified.
4781          * This is safe since EOP RPTR==WPTR for any inactive HQD
4782          * on ASICs that do not support context-save.
4783          * EOP writes/reads can start anywhere in the ring.
4784          */
4785         if (adev->asic_type != CHIP_TONGA) {
4786                 WREG32(mmCP_HQD_EOP_RPTR, mqd->cp_hqd_eop_rptr);
4787                 WREG32(mmCP_HQD_EOP_WPTR, mqd->cp_hqd_eop_wptr);
4788                 WREG32(mmCP_HQD_EOP_WPTR_MEM, mqd->cp_hqd_eop_wptr_mem);
4789         }
4790
4791         for (mqd_reg = mmCP_HQD_EOP_EVENTS; mqd_reg <= mmCP_HQD_ERROR; mqd_reg++)
4792                 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4793
4794         /* activate the HQD */
4795         for (mqd_reg = mmCP_MQD_BASE_ADDR; mqd_reg <= mmCP_HQD_ACTIVE; mqd_reg++)
4796                 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4797
4798         return 0;
4799 }
4800
4801 static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring)
4802 {
4803         struct amdgpu_device *adev = ring->adev;
4804         struct vi_mqd *mqd = ring->mqd_ptr;
4805         int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
4806
4807         gfx_v8_0_kiq_setting(ring);
4808
4809         if (adev->in_sriov_reset) { /* for GPU_RESET case */
4810                 /* reset MQD to a clean status */
4811                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4812                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4813
4814                 /* reset ring buffer */
4815                 ring->wptr = 0;
4816                 amdgpu_ring_clear_ring(ring);
4817                 mutex_lock(&adev->srbm_mutex);
4818                 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4819                 gfx_v8_0_mqd_commit(adev, mqd);
4820                 vi_srbm_select(adev, 0, 0, 0, 0);
4821                 mutex_unlock(&adev->srbm_mutex);
4822         } else {
4823                 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4824                 ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4825                 ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
4826                 mutex_lock(&adev->srbm_mutex);
4827                 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4828                 gfx_v8_0_mqd_init(ring);
4829                 gfx_v8_0_mqd_commit(adev, mqd);
4830                 vi_srbm_select(adev, 0, 0, 0, 0);
4831                 mutex_unlock(&adev->srbm_mutex);
4832
4833                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4834                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4835         }
4836
4837         return 0;
4838 }
4839
4840 static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring)
4841 {
4842         struct amdgpu_device *adev = ring->adev;
4843         struct vi_mqd *mqd = ring->mqd_ptr;
4844         int mqd_idx = ring - &adev->gfx.compute_ring[0];
4845
4846         if (!adev->in_sriov_reset && !adev->gfx.in_suspend) {
4847                 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4848                 ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4849                 ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
4850                 mutex_lock(&adev->srbm_mutex);
4851                 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4852                 gfx_v8_0_mqd_init(ring);
4853                 vi_srbm_select(adev, 0, 0, 0, 0);
4854                 mutex_unlock(&adev->srbm_mutex);
4855
4856                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4857                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4858         } else if (adev->in_sriov_reset) { /* for GPU_RESET case */
4859                 /* reset MQD to a clean status */
4860                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4861                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4862                 /* reset ring buffer */
4863                 ring->wptr = 0;
4864                 amdgpu_ring_clear_ring(ring);
4865         } else {
4866                 amdgpu_ring_clear_ring(ring);
4867         }
4868         return 0;
4869 }
4870
4871 static void gfx_v8_0_set_mec_doorbell_range(struct amdgpu_device *adev)
4872 {
4873         if (adev->asic_type > CHIP_TONGA) {
4874                 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, AMDGPU_DOORBELL_KIQ << 2);
4875                 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, AMDGPU_DOORBELL_MEC_RING7 << 2);
4876         }
4877         /* enable doorbells */
4878         WREG32_FIELD(CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4879 }
4880
4881 static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev)
4882 {
4883         struct amdgpu_ring *ring = NULL;
4884         int r = 0, i;
4885
4886         gfx_v8_0_cp_compute_enable(adev, true);
4887
4888         ring = &adev->gfx.kiq.ring;
4889
4890         r = amdgpu_bo_reserve(ring->mqd_obj, false);
4891         if (unlikely(r != 0))
4892                 goto done;
4893
4894         r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4895         if (!r) {
4896                 r = gfx_v8_0_kiq_init_queue(ring);
4897                 amdgpu_bo_kunmap(ring->mqd_obj);
4898                 ring->mqd_ptr = NULL;
4899         }
4900         amdgpu_bo_unreserve(ring->mqd_obj);
4901         if (r)
4902                 goto done;
4903
4904         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4905                 ring = &adev->gfx.compute_ring[i];
4906
4907                 r = amdgpu_bo_reserve(ring->mqd_obj, false);
4908                 if (unlikely(r != 0))
4909                         goto done;
4910                 r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4911                 if (!r) {
4912                         r = gfx_v8_0_kcq_init_queue(ring);
4913                         amdgpu_bo_kunmap(ring->mqd_obj);
4914                         ring->mqd_ptr = NULL;
4915                 }
4916                 amdgpu_bo_unreserve(ring->mqd_obj);
4917                 if (r)
4918                         goto done;
4919         }
4920
4921         gfx_v8_0_set_mec_doorbell_range(adev);
4922
4923         r = gfx_v8_0_kiq_kcq_enable(adev);
4924         if (r)
4925                 goto done;
4926
4927         /* Test KIQ */
4928         ring = &adev->gfx.kiq.ring;
4929         ring->ready = true;
4930         r = amdgpu_ring_test_ring(ring);
4931         if (r) {
4932                 ring->ready = false;
4933                 goto done;
4934         }
4935
4936         /* Test KCQs */
4937         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4938                 ring = &adev->gfx.compute_ring[i];
4939                 ring->ready = true;
4940                 r = amdgpu_ring_test_ring(ring);
4941                 if (r)
4942                         ring->ready = false;
4943         }
4944
4945 done:
4946         return r;
4947 }
4948
4949 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
4950 {
4951         int r;
4952
4953         if (!(adev->flags & AMD_IS_APU))
4954                 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4955
4956         if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
4957                         /* legacy firmware loading */
4958                 r = gfx_v8_0_cp_gfx_load_microcode(adev);
4959                 if (r)
4960                         return r;
4961
4962                 r = gfx_v8_0_cp_compute_load_microcode(adev);
4963                 if (r)
4964                         return r;
4965         }
4966
4967         r = gfx_v8_0_cp_gfx_resume(adev);
4968         if (r)
4969                 return r;
4970
4971         r = gfx_v8_0_kiq_resume(adev);
4972         if (r)
4973                 return r;
4974
4975         gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4976
4977         return 0;
4978 }
4979
4980 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
4981 {
4982         gfx_v8_0_cp_gfx_enable(adev, enable);
4983         gfx_v8_0_cp_compute_enable(adev, enable);
4984 }
4985
4986 static int gfx_v8_0_hw_init(void *handle)
4987 {
4988         int r;
4989         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4990
4991         gfx_v8_0_init_golden_registers(adev);
4992         gfx_v8_0_gpu_init(adev);
4993
4994         r = gfx_v8_0_rlc_resume(adev);
4995         if (r)
4996                 return r;
4997
4998         r = gfx_v8_0_cp_resume(adev);
4999
5000         return r;
5001 }
5002
5003 static int gfx_v8_0_kcq_disable(struct amdgpu_ring *kiq_ring,struct amdgpu_ring *ring)
5004 {
5005         struct amdgpu_device *adev = kiq_ring->adev;
5006         uint32_t scratch, tmp = 0;
5007         int r, i;
5008
5009         r = amdgpu_gfx_scratch_get(adev, &scratch);
5010         if (r) {
5011                 DRM_ERROR("Failed to get scratch reg (%d).\n", r);
5012                 return r;
5013         }
5014         WREG32(scratch, 0xCAFEDEAD);
5015
5016         r = amdgpu_ring_alloc(kiq_ring, 10);
5017         if (r) {
5018                 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
5019                 amdgpu_gfx_scratch_free(adev, scratch);
5020                 return r;
5021         }
5022
5023         /* unmap queues */
5024         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
5025         amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
5026                                                 PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */
5027                                                 PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
5028                                                 PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) |
5029                                                 PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
5030         amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
5031         amdgpu_ring_write(kiq_ring, 0);
5032         amdgpu_ring_write(kiq_ring, 0);
5033         amdgpu_ring_write(kiq_ring, 0);
5034         /* write to scratch for completion */
5035         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
5036         amdgpu_ring_write(kiq_ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
5037         amdgpu_ring_write(kiq_ring, 0xDEADBEEF);
5038         amdgpu_ring_commit(kiq_ring);
5039
5040         for (i = 0; i < adev->usec_timeout; i++) {
5041                 tmp = RREG32(scratch);
5042                 if (tmp == 0xDEADBEEF)
5043                         break;
5044                 DRM_UDELAY(1);
5045         }
5046         if (i >= adev->usec_timeout) {
5047                 DRM_ERROR("KCQ disabled failed (scratch(0x%04X)=0x%08X)\n", scratch, tmp);
5048                 r = -EINVAL;
5049         }
5050         amdgpu_gfx_scratch_free(adev, scratch);
5051         return r;
5052 }
5053
5054 static int gfx_v8_0_hw_fini(void *handle)
5055 {
5056         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5057         int i;
5058
5059         amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
5060         amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
5061
5062         /* disable KCQ to avoid CPC touch memory not valid anymore */
5063         for (i = 0; i < adev->gfx.num_compute_rings; i++)
5064                 gfx_v8_0_kcq_disable(&adev->gfx.kiq.ring, &adev->gfx.compute_ring[i]);
5065
5066         if (amdgpu_sriov_vf(adev)) {
5067                 pr_debug("For SRIOV client, shouldn't do anything.\n");
5068                 return 0;
5069         }
5070         gfx_v8_0_cp_enable(adev, false);
5071         gfx_v8_0_rlc_stop(adev);
5072
5073         amdgpu_set_powergating_state(adev,
5074                         AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_UNGATE);
5075
5076         return 0;
5077 }
5078
5079 static int gfx_v8_0_suspend(void *handle)
5080 {
5081         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5082         adev->gfx.in_suspend = true;
5083         return gfx_v8_0_hw_fini(adev);
5084 }
5085
5086 static int gfx_v8_0_resume(void *handle)
5087 {
5088         int r;
5089         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5090
5091         r = gfx_v8_0_hw_init(adev);
5092         adev->gfx.in_suspend = false;
5093         return r;
5094 }
5095
5096 static bool gfx_v8_0_is_idle(void *handle)
5097 {
5098         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5099
5100         if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE))
5101                 return false;
5102         else
5103                 return true;
5104 }
5105
5106 static int gfx_v8_0_wait_for_idle(void *handle)
5107 {
5108         unsigned i;
5109         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5110
5111         for (i = 0; i < adev->usec_timeout; i++) {
5112                 if (gfx_v8_0_is_idle(handle))
5113                         return 0;
5114
5115                 udelay(1);
5116         }
5117         return -ETIMEDOUT;
5118 }
5119
5120 static bool gfx_v8_0_check_soft_reset(void *handle)
5121 {
5122         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5123         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5124         u32 tmp;
5125
5126         /* GRBM_STATUS */
5127         tmp = RREG32(mmGRBM_STATUS);
5128         if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
5129                    GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
5130                    GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
5131                    GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
5132                    GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
5133                    GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK |
5134                    GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
5135                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5136                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
5137                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5138                                                 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
5139                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5140                                                 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5141         }
5142
5143         /* GRBM_STATUS2 */
5144         tmp = RREG32(mmGRBM_STATUS2);
5145         if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
5146                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5147                                                 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
5148
5149         if (REG_GET_FIELD(tmp, GRBM_STATUS2, CPF_BUSY) ||
5150             REG_GET_FIELD(tmp, GRBM_STATUS2, CPC_BUSY) ||
5151             REG_GET_FIELD(tmp, GRBM_STATUS2, CPG_BUSY)) {
5152                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5153                                                 SOFT_RESET_CPF, 1);
5154                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5155                                                 SOFT_RESET_CPC, 1);
5156                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5157                                                 SOFT_RESET_CPG, 1);
5158                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET,
5159                                                 SOFT_RESET_GRBM, 1);
5160         }
5161
5162         /* SRBM_STATUS */
5163         tmp = RREG32(mmSRBM_STATUS);
5164         if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
5165                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5166                                                 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5167         if (REG_GET_FIELD(tmp, SRBM_STATUS, SEM_BUSY))
5168                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5169                                                 SRBM_SOFT_RESET, SOFT_RESET_SEM, 1);
5170
5171         if (grbm_soft_reset || srbm_soft_reset) {
5172                 adev->gfx.grbm_soft_reset = grbm_soft_reset;
5173                 adev->gfx.srbm_soft_reset = srbm_soft_reset;
5174                 return true;
5175         } else {
5176                 adev->gfx.grbm_soft_reset = 0;
5177                 adev->gfx.srbm_soft_reset = 0;
5178                 return false;
5179         }
5180 }
5181
5182 static int gfx_v8_0_pre_soft_reset(void *handle)
5183 {
5184         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5185         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5186
5187         if ((!adev->gfx.grbm_soft_reset) &&
5188             (!adev->gfx.srbm_soft_reset))
5189                 return 0;
5190
5191         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5192         srbm_soft_reset = adev->gfx.srbm_soft_reset;
5193
5194         /* stop the rlc */
5195         gfx_v8_0_rlc_stop(adev);
5196
5197         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5198             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5199                 /* Disable GFX parsing/prefetching */
5200                 gfx_v8_0_cp_gfx_enable(adev, false);
5201
5202         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5203             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5204             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5205             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5206                 int i;
5207
5208                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5209                         struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5210
5211                         mutex_lock(&adev->srbm_mutex);
5212                         vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5213                         gfx_v8_0_deactivate_hqd(adev, 2);
5214                         vi_srbm_select(adev, 0, 0, 0, 0);
5215                         mutex_unlock(&adev->srbm_mutex);
5216                 }
5217                 /* Disable MEC parsing/prefetching */
5218                 gfx_v8_0_cp_compute_enable(adev, false);
5219         }
5220
5221        return 0;
5222 }
5223
5224 static int gfx_v8_0_soft_reset(void *handle)
5225 {
5226         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5227         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5228         u32 tmp;
5229
5230         if ((!adev->gfx.grbm_soft_reset) &&
5231             (!adev->gfx.srbm_soft_reset))
5232                 return 0;
5233
5234         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5235         srbm_soft_reset = adev->gfx.srbm_soft_reset;
5236
5237         if (grbm_soft_reset || srbm_soft_reset) {
5238                 tmp = RREG32(mmGMCON_DEBUG);
5239                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 1);
5240                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 1);
5241                 WREG32(mmGMCON_DEBUG, tmp);
5242                 udelay(50);
5243         }
5244
5245         if (grbm_soft_reset) {
5246                 tmp = RREG32(mmGRBM_SOFT_RESET);
5247                 tmp |= grbm_soft_reset;
5248                 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5249                 WREG32(mmGRBM_SOFT_RESET, tmp);
5250                 tmp = RREG32(mmGRBM_SOFT_RESET);
5251
5252                 udelay(50);
5253
5254                 tmp &= ~grbm_soft_reset;
5255                 WREG32(mmGRBM_SOFT_RESET, tmp);
5256                 tmp = RREG32(mmGRBM_SOFT_RESET);
5257         }
5258
5259         if (srbm_soft_reset) {
5260                 tmp = RREG32(mmSRBM_SOFT_RESET);
5261                 tmp |= srbm_soft_reset;
5262                 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5263                 WREG32(mmSRBM_SOFT_RESET, tmp);
5264                 tmp = RREG32(mmSRBM_SOFT_RESET);
5265
5266                 udelay(50);
5267
5268                 tmp &= ~srbm_soft_reset;
5269                 WREG32(mmSRBM_SOFT_RESET, tmp);
5270                 tmp = RREG32(mmSRBM_SOFT_RESET);
5271         }
5272
5273         if (grbm_soft_reset || srbm_soft_reset) {
5274                 tmp = RREG32(mmGMCON_DEBUG);
5275                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 0);
5276                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 0);
5277                 WREG32(mmGMCON_DEBUG, tmp);
5278         }
5279
5280         /* Wait a little for things to settle down */
5281         udelay(50);
5282
5283         return 0;
5284 }
5285
5286 static int gfx_v8_0_post_soft_reset(void *handle)
5287 {
5288         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5289         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5290
5291         if ((!adev->gfx.grbm_soft_reset) &&
5292             (!adev->gfx.srbm_soft_reset))
5293                 return 0;
5294
5295         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5296         srbm_soft_reset = adev->gfx.srbm_soft_reset;
5297
5298         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5299             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5300                 gfx_v8_0_cp_gfx_resume(adev);
5301
5302         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5303             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5304             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5305             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5306                 int i;
5307
5308                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5309                         struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5310
5311                         mutex_lock(&adev->srbm_mutex);
5312                         vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5313                         gfx_v8_0_deactivate_hqd(adev, 2);
5314                         vi_srbm_select(adev, 0, 0, 0, 0);
5315                         mutex_unlock(&adev->srbm_mutex);
5316                 }
5317                 gfx_v8_0_kiq_resume(adev);
5318         }
5319         gfx_v8_0_rlc_start(adev);
5320
5321         return 0;
5322 }
5323
5324 /**
5325  * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
5326  *
5327  * @adev: amdgpu_device pointer
5328  *
5329  * Fetches a GPU clock counter snapshot.
5330  * Returns the 64 bit clock counter snapshot.
5331  */
5332 static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
5333 {
5334         uint64_t clock;
5335
5336         mutex_lock(&adev->gfx.gpu_clock_mutex);
5337         WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
5338         clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
5339                 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5340         mutex_unlock(&adev->gfx.gpu_clock_mutex);
5341         return clock;
5342 }
5343
5344 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
5345                                           uint32_t vmid,
5346                                           uint32_t gds_base, uint32_t gds_size,
5347                                           uint32_t gws_base, uint32_t gws_size,
5348                                           uint32_t oa_base, uint32_t oa_size)
5349 {
5350         gds_base = gds_base >> AMDGPU_GDS_SHIFT;
5351         gds_size = gds_size >> AMDGPU_GDS_SHIFT;
5352
5353         gws_base = gws_base >> AMDGPU_GWS_SHIFT;
5354         gws_size = gws_size >> AMDGPU_GWS_SHIFT;
5355
5356         oa_base = oa_base >> AMDGPU_OA_SHIFT;
5357         oa_size = oa_size >> AMDGPU_OA_SHIFT;
5358
5359         /* GDS Base */
5360         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5361         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5362                                 WRITE_DATA_DST_SEL(0)));
5363         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
5364         amdgpu_ring_write(ring, 0);
5365         amdgpu_ring_write(ring, gds_base);
5366
5367         /* GDS Size */
5368         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5369         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5370                                 WRITE_DATA_DST_SEL(0)));
5371         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
5372         amdgpu_ring_write(ring, 0);
5373         amdgpu_ring_write(ring, gds_size);
5374
5375         /* GWS */
5376         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5377         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5378                                 WRITE_DATA_DST_SEL(0)));
5379         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
5380         amdgpu_ring_write(ring, 0);
5381         amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
5382
5383         /* OA */
5384         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5385         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5386                                 WRITE_DATA_DST_SEL(0)));
5387         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
5388         amdgpu_ring_write(ring, 0);
5389         amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
5390 }
5391
5392 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
5393 {
5394         WREG32(mmSQ_IND_INDEX,
5395                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5396                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5397                 (address << SQ_IND_INDEX__INDEX__SHIFT) |
5398                 (SQ_IND_INDEX__FORCE_READ_MASK));
5399         return RREG32(mmSQ_IND_DATA);
5400 }
5401
5402 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
5403                            uint32_t wave, uint32_t thread,
5404                            uint32_t regno, uint32_t num, uint32_t *out)
5405 {
5406         WREG32(mmSQ_IND_INDEX,
5407                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5408                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5409                 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
5410                 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
5411                 (SQ_IND_INDEX__FORCE_READ_MASK) |
5412                 (SQ_IND_INDEX__AUTO_INCR_MASK));
5413         while (num--)
5414                 *(out++) = RREG32(mmSQ_IND_DATA);
5415 }
5416
5417 static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
5418 {
5419         /* type 0 wave data */
5420         dst[(*no_fields)++] = 0;
5421         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
5422         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
5423         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
5424         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
5425         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
5426         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
5427         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
5428         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
5429         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
5430         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
5431         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
5432         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
5433         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_LO);
5434         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_HI);
5435         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_LO);
5436         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI);
5437         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
5438         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
5439 }
5440
5441 static void gfx_v8_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
5442                                      uint32_t wave, uint32_t start,
5443                                      uint32_t size, uint32_t *dst)
5444 {
5445         wave_read_regs(
5446                 adev, simd, wave, 0,
5447                 start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
5448 }
5449
5450
5451 static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = {
5452         .get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter,
5453         .select_se_sh = &gfx_v8_0_select_se_sh,
5454         .read_wave_data = &gfx_v8_0_read_wave_data,
5455         .read_wave_sgprs = &gfx_v8_0_read_wave_sgprs,
5456 };
5457
5458 static int gfx_v8_0_early_init(void *handle)
5459 {
5460         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5461
5462         adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
5463         adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
5464         adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
5465         gfx_v8_0_set_ring_funcs(adev);
5466         gfx_v8_0_set_irq_funcs(adev);
5467         gfx_v8_0_set_gds_init(adev);
5468         gfx_v8_0_set_rlc_funcs(adev);
5469
5470         return 0;
5471 }
5472
5473 static int gfx_v8_0_late_init(void *handle)
5474 {
5475         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5476         int r;
5477
5478         r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
5479         if (r)
5480                 return r;
5481
5482         r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
5483         if (r)
5484                 return r;
5485
5486         /* requires IBs so do in late init after IB pool is initialized */
5487         r = gfx_v8_0_do_edc_gpr_workarounds(adev);
5488         if (r)
5489                 return r;
5490
5491         amdgpu_set_powergating_state(adev,
5492                         AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_GATE);
5493
5494         return 0;
5495 }
5496
5497 static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
5498                                                        bool enable)
5499 {
5500         if ((adev->asic_type == CHIP_POLARIS11) ||
5501             (adev->asic_type == CHIP_POLARIS12))
5502                 /* Send msg to SMU via Powerplay */
5503                 amdgpu_set_powergating_state(adev,
5504                                              AMD_IP_BLOCK_TYPE_SMC,
5505                                              enable ?
5506                                              AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE);
5507
5508         WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0);
5509 }
5510
5511 static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
5512                                                         bool enable)
5513 {
5514         WREG32_FIELD(RLC_PG_CNTL, DYN_PER_CU_PG_ENABLE, enable ? 1 : 0);
5515 }
5516
5517 static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
5518                 bool enable)
5519 {
5520         WREG32_FIELD(RLC_PG_CNTL, QUICK_PG_ENABLE, enable ? 1 : 0);
5521 }
5522
5523 static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
5524                                           bool enable)
5525 {
5526         WREG32_FIELD(RLC_PG_CNTL, GFX_POWER_GATING_ENABLE, enable ? 1 : 0);
5527 }
5528
5529 static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev,
5530                                                 bool enable)
5531 {
5532         WREG32_FIELD(RLC_PG_CNTL, GFX_PIPELINE_PG_ENABLE, enable ? 1 : 0);
5533
5534         /* Read any GFX register to wake up GFX. */
5535         if (!enable)
5536                 RREG32(mmDB_RENDER_CONTROL);
5537 }
5538
5539 static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev,
5540                                           bool enable)
5541 {
5542         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
5543                 cz_enable_gfx_cg_power_gating(adev, true);
5544                 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
5545                         cz_enable_gfx_pipeline_power_gating(adev, true);
5546         } else {
5547                 cz_enable_gfx_cg_power_gating(adev, false);
5548                 cz_enable_gfx_pipeline_power_gating(adev, false);
5549         }
5550 }
5551
5552 static int gfx_v8_0_set_powergating_state(void *handle,
5553                                           enum amd_powergating_state state)
5554 {
5555         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5556         bool enable = (state == AMD_PG_STATE_GATE);
5557
5558         if (amdgpu_sriov_vf(adev))
5559                 return 0;
5560
5561         switch (adev->asic_type) {
5562         case CHIP_CARRIZO:
5563         case CHIP_STONEY:
5564
5565                 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
5566                         cz_enable_sck_slow_down_on_power_up(adev, true);
5567                         cz_enable_sck_slow_down_on_power_down(adev, true);
5568                 } else {
5569                         cz_enable_sck_slow_down_on_power_up(adev, false);
5570                         cz_enable_sck_slow_down_on_power_down(adev, false);
5571                 }
5572                 if (adev->pg_flags & AMD_PG_SUPPORT_CP)
5573                         cz_enable_cp_power_gating(adev, true);
5574                 else
5575                         cz_enable_cp_power_gating(adev, false);
5576
5577                 cz_update_gfx_cg_power_gating(adev, enable);
5578
5579                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5580                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5581                 else
5582                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5583
5584                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5585                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5586                 else
5587                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5588                 break;
5589         case CHIP_POLARIS11:
5590         case CHIP_POLARIS12:
5591                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5592                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5593                 else
5594                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5595
5596                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5597                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5598                 else
5599                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5600
5601                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable)
5602                         polaris11_enable_gfx_quick_mg_power_gating(adev, true);
5603                 else
5604                         polaris11_enable_gfx_quick_mg_power_gating(adev, false);
5605                 break;
5606         default:
5607                 break;
5608         }
5609
5610         return 0;
5611 }
5612
5613 static void gfx_v8_0_get_clockgating_state(void *handle, u32 *flags)
5614 {
5615         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5616         int data;
5617
5618         if (amdgpu_sriov_vf(adev))
5619                 *flags = 0;
5620
5621         /* AMD_CG_SUPPORT_GFX_MGCG */
5622         data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5623         if (!(data & RLC_CGTT_MGCG_OVERRIDE__CPF_MASK))
5624                 *flags |= AMD_CG_SUPPORT_GFX_MGCG;
5625
5626         /* AMD_CG_SUPPORT_GFX_CGLG */
5627         data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5628         if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5629                 *flags |= AMD_CG_SUPPORT_GFX_CGCG;
5630
5631         /* AMD_CG_SUPPORT_GFX_CGLS */
5632         if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5633                 *flags |= AMD_CG_SUPPORT_GFX_CGLS;
5634
5635         /* AMD_CG_SUPPORT_GFX_CGTS */
5636         data = RREG32(mmCGTS_SM_CTRL_REG);
5637         if (!(data & CGTS_SM_CTRL_REG__OVERRIDE_MASK))
5638                 *flags |= AMD_CG_SUPPORT_GFX_CGTS;
5639
5640         /* AMD_CG_SUPPORT_GFX_CGTS_LS */
5641         if (!(data & CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK))
5642                 *flags |= AMD_CG_SUPPORT_GFX_CGTS_LS;
5643
5644         /* AMD_CG_SUPPORT_GFX_RLC_LS */
5645         data = RREG32(mmRLC_MEM_SLP_CNTL);
5646         if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
5647                 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
5648
5649         /* AMD_CG_SUPPORT_GFX_CP_LS */
5650         data = RREG32(mmCP_MEM_SLP_CNTL);
5651         if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
5652                 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
5653 }
5654
5655 static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
5656                                      uint32_t reg_addr, uint32_t cmd)
5657 {
5658         uint32_t data;
5659
5660         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
5661
5662         WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5663         WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5664
5665         data = RREG32(mmRLC_SERDES_WR_CTRL);
5666         if (adev->asic_type == CHIP_STONEY)
5667                 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5668                           RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5669                           RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5670                           RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5671                           RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5672                           RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5673                           RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5674                           RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5675                           RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5676         else
5677                 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5678                           RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5679                           RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5680                           RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5681                           RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5682                           RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5683                           RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5684                           RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5685                           RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
5686                           RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
5687                           RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5688         data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
5689                  (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
5690                  (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
5691                  (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
5692
5693         WREG32(mmRLC_SERDES_WR_CTRL, data);
5694 }
5695
5696 #define MSG_ENTER_RLC_SAFE_MODE     1
5697 #define MSG_EXIT_RLC_SAFE_MODE      0
5698 #define RLC_GPR_REG2__REQ_MASK 0x00000001
5699 #define RLC_GPR_REG2__REQ__SHIFT 0
5700 #define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001
5701 #define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e
5702
5703 static void iceland_enter_rlc_safe_mode(struct amdgpu_device *adev)
5704 {
5705         u32 data;
5706         unsigned i;
5707
5708         data = RREG32(mmRLC_CNTL);
5709         if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5710                 return;
5711
5712         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5713                 data |= RLC_SAFE_MODE__CMD_MASK;
5714                 data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5715                 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
5716                 WREG32(mmRLC_SAFE_MODE, data);
5717
5718                 for (i = 0; i < adev->usec_timeout; i++) {
5719                         if ((RREG32(mmRLC_GPM_STAT) &
5720                              (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5721                               RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5722                             (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5723                              RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5724                                 break;
5725                         udelay(1);
5726                 }
5727
5728                 for (i = 0; i < adev->usec_timeout; i++) {
5729                         if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5730                                 break;
5731                         udelay(1);
5732                 }
5733                 adev->gfx.rlc.in_safe_mode = true;
5734         }
5735 }
5736
5737 static void iceland_exit_rlc_safe_mode(struct amdgpu_device *adev)
5738 {
5739         u32 data = 0;
5740         unsigned i;
5741
5742         data = RREG32(mmRLC_CNTL);
5743         if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5744                 return;
5745
5746         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5747                 if (adev->gfx.rlc.in_safe_mode) {
5748                         data |= RLC_SAFE_MODE__CMD_MASK;
5749                         data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5750                         WREG32(mmRLC_SAFE_MODE, data);
5751                         adev->gfx.rlc.in_safe_mode = false;
5752                 }
5753         }
5754
5755         for (i = 0; i < adev->usec_timeout; i++) {
5756                 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5757                         break;
5758                 udelay(1);
5759         }
5760 }
5761
5762 static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
5763         .enter_safe_mode = iceland_enter_rlc_safe_mode,
5764         .exit_safe_mode = iceland_exit_rlc_safe_mode
5765 };
5766
5767 static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
5768                                                       bool enable)
5769 {
5770         uint32_t temp, data;
5771
5772         adev->gfx.rlc.funcs->enter_safe_mode(adev);
5773
5774         /* It is disabled by HW by default */
5775         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
5776                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5777                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS)
5778                                 /* 1 - RLC memory Light sleep */
5779                                 WREG32_FIELD(RLC_MEM_SLP_CNTL, RLC_MEM_LS_EN, 1);
5780
5781                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS)
5782                                 WREG32_FIELD(CP_MEM_SLP_CNTL, CP_MEM_LS_EN, 1);
5783                 }
5784
5785                 /* 3 - RLC_CGTT_MGCG_OVERRIDE */
5786                 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5787                 if (adev->flags & AMD_IS_APU)
5788                         data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5789                                   RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5790                                   RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK);
5791                 else
5792                         data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5793                                   RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5794                                   RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5795                                   RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5796
5797                 if (temp != data)
5798                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5799
5800                 /* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5801                 gfx_v8_0_wait_for_rlc_serdes(adev);
5802
5803                 /* 5 - clear mgcg override */
5804                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5805
5806                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
5807                         /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
5808                         temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5809                         data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
5810                         data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
5811                         data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
5812                         data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
5813                         if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
5814                             (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
5815                                 data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
5816                         data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
5817                         data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
5818                         if (temp != data)
5819                                 WREG32(mmCGTS_SM_CTRL_REG, data);
5820                 }
5821                 udelay(50);
5822
5823                 /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5824                 gfx_v8_0_wait_for_rlc_serdes(adev);
5825         } else {
5826                 /* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
5827                 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5828                 data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5829                                 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5830                                 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5831                                 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5832                 if (temp != data)
5833                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5834
5835                 /* 2 - disable MGLS in RLC */
5836                 data = RREG32(mmRLC_MEM_SLP_CNTL);
5837                 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
5838                         data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5839                         WREG32(mmRLC_MEM_SLP_CNTL, data);
5840                 }
5841
5842                 /* 3 - disable MGLS in CP */
5843                 data = RREG32(mmCP_MEM_SLP_CNTL);
5844                 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
5845                         data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5846                         WREG32(mmCP_MEM_SLP_CNTL, data);
5847                 }
5848
5849                 /* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
5850                 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5851                 data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
5852                                 CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
5853                 if (temp != data)
5854                         WREG32(mmCGTS_SM_CTRL_REG, data);
5855
5856                 /* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5857                 gfx_v8_0_wait_for_rlc_serdes(adev);
5858
5859                 /* 6 - set mgcg override */
5860                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5861
5862                 udelay(50);
5863
5864                 /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5865                 gfx_v8_0_wait_for_rlc_serdes(adev);
5866         }
5867
5868         adev->gfx.rlc.funcs->exit_safe_mode(adev);
5869 }
5870
5871 static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5872                                                       bool enable)
5873 {
5874         uint32_t temp, temp1, data, data1;
5875
5876         temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5877
5878         adev->gfx.rlc.funcs->enter_safe_mode(adev);
5879
5880         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
5881                 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5882                 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
5883                 if (temp1 != data1)
5884                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5885
5886                 /* : wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5887                 gfx_v8_0_wait_for_rlc_serdes(adev);
5888
5889                 /* 2 - clear cgcg override */
5890                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5891
5892                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5893                 gfx_v8_0_wait_for_rlc_serdes(adev);
5894
5895                 /* 3 - write cmd to set CGLS */
5896                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
5897
5898                 /* 4 - enable cgcg */
5899                 data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5900
5901                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5902                         /* enable cgls*/
5903                         data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5904
5905                         temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5906                         data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
5907
5908                         if (temp1 != data1)
5909                                 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5910                 } else {
5911                         data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5912                 }
5913
5914                 if (temp != data)
5915                         WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5916
5917                 /* 5 enable cntx_empty_int_enable/cntx_busy_int_enable/
5918                  * Cmp_busy/GFX_Idle interrupts
5919                  */
5920                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5921         } else {
5922                 /* disable cntx_empty_int_enable & GFX Idle interrupt */
5923                 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5924
5925                 /* TEST CGCG */
5926                 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5927                 data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
5928                                 RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
5929                 if (temp1 != data1)
5930                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5931
5932                 /* read gfx register to wake up cgcg */
5933                 RREG32(mmCB_CGTT_SCLK_CTRL);
5934                 RREG32(mmCB_CGTT_SCLK_CTRL);
5935                 RREG32(mmCB_CGTT_SCLK_CTRL);
5936                 RREG32(mmCB_CGTT_SCLK_CTRL);
5937
5938                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5939                 gfx_v8_0_wait_for_rlc_serdes(adev);
5940
5941                 /* write cmd to Set CGCG Overrride */
5942                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5943
5944                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5945                 gfx_v8_0_wait_for_rlc_serdes(adev);
5946
5947                 /* write cmd to Clear CGLS */
5948                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
5949
5950                 /* disable cgcg, cgls should be disabled too. */
5951                 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
5952                           RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
5953                 if (temp != data)
5954                         WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5955                 /* enable interrupts again for PG */
5956                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5957         }
5958
5959         gfx_v8_0_wait_for_rlc_serdes(adev);
5960
5961         adev->gfx.rlc.funcs->exit_safe_mode(adev);
5962 }
5963 static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
5964                                             bool enable)
5965 {
5966         if (enable) {
5967                 /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
5968                  * ===  MGCG + MGLS + TS(CG/LS) ===
5969                  */
5970                 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5971                 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5972         } else {
5973                 /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
5974                  * ===  CGCG + CGLS ===
5975                  */
5976                 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5977                 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5978         }
5979         return 0;
5980 }
5981
5982 static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev,
5983                                           enum amd_clockgating_state state)
5984 {
5985         uint32_t msg_id, pp_state = 0;
5986         uint32_t pp_support_state = 0;
5987         void *pp_handle = adev->powerplay.pp_handle;
5988
5989         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
5990                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5991                         pp_support_state = PP_STATE_SUPPORT_LS;
5992                         pp_state = PP_STATE_LS;
5993                 }
5994                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
5995                         pp_support_state |= PP_STATE_SUPPORT_CG;
5996                         pp_state |= PP_STATE_CG;
5997                 }
5998                 if (state == AMD_CG_STATE_UNGATE)
5999                         pp_state = 0;
6000
6001                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6002                                 PP_BLOCK_GFX_CG,
6003                                 pp_support_state,
6004                                 pp_state);
6005                 amd_set_clockgating_by_smu(pp_handle, msg_id);
6006         }
6007
6008         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
6009                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
6010                         pp_support_state = PP_STATE_SUPPORT_LS;
6011                         pp_state = PP_STATE_LS;
6012                 }
6013
6014                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
6015                         pp_support_state |= PP_STATE_SUPPORT_CG;
6016                         pp_state |= PP_STATE_CG;
6017                 }
6018
6019                 if (state == AMD_CG_STATE_UNGATE)
6020                         pp_state = 0;
6021
6022                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6023                                 PP_BLOCK_GFX_MG,
6024                                 pp_support_state,
6025                                 pp_state);
6026                 amd_set_clockgating_by_smu(pp_handle, msg_id);
6027         }
6028
6029         return 0;
6030 }
6031
6032 static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev,
6033                                           enum amd_clockgating_state state)
6034 {
6035
6036         uint32_t msg_id, pp_state = 0;
6037         uint32_t pp_support_state = 0;
6038         void *pp_handle = adev->powerplay.pp_handle;
6039
6040         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
6041                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
6042                         pp_support_state = PP_STATE_SUPPORT_LS;
6043                         pp_state = PP_STATE_LS;
6044                 }
6045                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
6046                         pp_support_state |= PP_STATE_SUPPORT_CG;
6047                         pp_state |= PP_STATE_CG;
6048                 }
6049                 if (state == AMD_CG_STATE_UNGATE)
6050                         pp_state = 0;
6051
6052                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6053                                 PP_BLOCK_GFX_CG,
6054                                 pp_support_state,
6055                                 pp_state);
6056                 amd_set_clockgating_by_smu(pp_handle, msg_id);
6057         }
6058
6059         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_GFX_3D_CGLS)) {
6060                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) {
6061                         pp_support_state = PP_STATE_SUPPORT_LS;
6062                         pp_state = PP_STATE_LS;
6063                 }
6064                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) {
6065                         pp_support_state |= PP_STATE_SUPPORT_CG;
6066                         pp_state |= PP_STATE_CG;
6067                 }
6068                 if (state == AMD_CG_STATE_UNGATE)
6069                         pp_state = 0;
6070
6071                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6072                                 PP_BLOCK_GFX_3D,
6073                                 pp_support_state,
6074                                 pp_state);
6075                 amd_set_clockgating_by_smu(pp_handle, msg_id);
6076         }
6077
6078         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
6079                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
6080                         pp_support_state = PP_STATE_SUPPORT_LS;
6081                         pp_state = PP_STATE_LS;
6082                 }
6083
6084                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
6085                         pp_support_state |= PP_STATE_SUPPORT_CG;
6086                         pp_state |= PP_STATE_CG;
6087                 }
6088
6089                 if (state == AMD_CG_STATE_UNGATE)
6090                         pp_state = 0;
6091
6092                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6093                                 PP_BLOCK_GFX_MG,
6094                                 pp_support_state,
6095                                 pp_state);
6096                 amd_set_clockgating_by_smu(pp_handle, msg_id);
6097         }
6098
6099         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
6100                 pp_support_state = PP_STATE_SUPPORT_LS;
6101
6102                 if (state == AMD_CG_STATE_UNGATE)
6103                         pp_state = 0;
6104                 else
6105                         pp_state = PP_STATE_LS;
6106
6107                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6108                                 PP_BLOCK_GFX_RLC,
6109                                 pp_support_state,
6110                                 pp_state);
6111                 amd_set_clockgating_by_smu(pp_handle, msg_id);
6112         }
6113
6114         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
6115                 pp_support_state = PP_STATE_SUPPORT_LS;
6116
6117                 if (state == AMD_CG_STATE_UNGATE)
6118                         pp_state = 0;
6119                 else
6120                         pp_state = PP_STATE_LS;
6121                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6122                         PP_BLOCK_GFX_CP,
6123                         pp_support_state,
6124                         pp_state);
6125                 amd_set_clockgating_by_smu(pp_handle, msg_id);
6126         }
6127
6128         return 0;
6129 }
6130
6131 static int gfx_v8_0_set_clockgating_state(void *handle,
6132                                           enum amd_clockgating_state state)
6133 {
6134         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
6135
6136         if (amdgpu_sriov_vf(adev))
6137                 return 0;
6138
6139         switch (adev->asic_type) {
6140         case CHIP_FIJI:
6141         case CHIP_CARRIZO:
6142         case CHIP_STONEY:
6143                 gfx_v8_0_update_gfx_clock_gating(adev,
6144                                                  state == AMD_CG_STATE_GATE);
6145                 break;
6146         case CHIP_TONGA:
6147                 gfx_v8_0_tonga_update_gfx_clock_gating(adev, state);
6148                 break;
6149         case CHIP_POLARIS10:
6150         case CHIP_POLARIS11:
6151         case CHIP_POLARIS12:
6152                 gfx_v8_0_polaris_update_gfx_clock_gating(adev, state);
6153                 break;
6154         default:
6155                 break;
6156         }
6157         return 0;
6158 }
6159
6160 static u64 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring)
6161 {
6162         return ring->adev->wb.wb[ring->rptr_offs];
6163 }
6164
6165 static u64 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
6166 {
6167         struct amdgpu_device *adev = ring->adev;
6168
6169         if (ring->use_doorbell)
6170                 /* XXX check if swapping is necessary on BE */
6171                 return ring->adev->wb.wb[ring->wptr_offs];
6172         else
6173                 return RREG32(mmCP_RB0_WPTR);
6174 }
6175
6176 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
6177 {
6178         struct amdgpu_device *adev = ring->adev;
6179
6180         if (ring->use_doorbell) {
6181                 /* XXX check if swapping is necessary on BE */
6182                 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6183                 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6184         } else {
6185                 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
6186                 (void)RREG32(mmCP_RB0_WPTR);
6187         }
6188 }
6189
6190 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
6191 {
6192         u32 ref_and_mask, reg_mem_engine;
6193
6194         if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) ||
6195             (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)) {
6196                 switch (ring->me) {
6197                 case 1:
6198                         ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
6199                         break;
6200                 case 2:
6201                         ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
6202                         break;
6203                 default:
6204                         return;
6205                 }
6206                 reg_mem_engine = 0;
6207         } else {
6208                 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
6209                 reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
6210         }
6211
6212         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6213         amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
6214                                  WAIT_REG_MEM_FUNCTION(3) |  /* == */
6215                                  reg_mem_engine));
6216         amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
6217         amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
6218         amdgpu_ring_write(ring, ref_and_mask);
6219         amdgpu_ring_write(ring, ref_and_mask);
6220         amdgpu_ring_write(ring, 0x20); /* poll interval */
6221 }
6222
6223 static void gfx_v8_0_ring_emit_vgt_flush(struct amdgpu_ring *ring)
6224 {
6225         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6226         amdgpu_ring_write(ring, EVENT_TYPE(VS_PARTIAL_FLUSH) |
6227                 EVENT_INDEX(4));
6228
6229         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6230         amdgpu_ring_write(ring, EVENT_TYPE(VGT_FLUSH) |
6231                 EVENT_INDEX(0));
6232 }
6233
6234
6235 static void gfx_v8_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
6236 {
6237         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6238         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6239                                  WRITE_DATA_DST_SEL(0) |
6240                                  WR_CONFIRM));
6241         amdgpu_ring_write(ring, mmHDP_DEBUG0);
6242         amdgpu_ring_write(ring, 0);
6243         amdgpu_ring_write(ring, 1);
6244
6245 }
6246
6247 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
6248                                       struct amdgpu_ib *ib,
6249                                       unsigned vm_id, bool ctx_switch)
6250 {
6251         u32 header, control = 0;
6252
6253         if (ib->flags & AMDGPU_IB_FLAG_CE)
6254                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
6255         else
6256                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
6257
6258         control |= ib->length_dw | (vm_id << 24);
6259
6260         if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
6261                 control |= INDIRECT_BUFFER_PRE_ENB(1);
6262
6263                 if (!(ib->flags & AMDGPU_IB_FLAG_CE))
6264                         gfx_v8_0_ring_emit_de_meta(ring);
6265         }
6266
6267         amdgpu_ring_write(ring, header);
6268         amdgpu_ring_write(ring,
6269 #ifdef __BIG_ENDIAN
6270                           (2 << 0) |
6271 #endif
6272                           (ib->gpu_addr & 0xFFFFFFFC));
6273         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6274         amdgpu_ring_write(ring, control);
6275 }
6276
6277 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
6278                                           struct amdgpu_ib *ib,
6279                                           unsigned vm_id, bool ctx_switch)
6280 {
6281         u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vm_id << 24);
6282
6283         amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
6284         amdgpu_ring_write(ring,
6285 #ifdef __BIG_ENDIAN
6286                                 (2 << 0) |
6287 #endif
6288                                 (ib->gpu_addr & 0xFFFFFFFC));
6289         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6290         amdgpu_ring_write(ring, control);
6291 }
6292
6293 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
6294                                          u64 seq, unsigned flags)
6295 {
6296         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6297         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6298
6299         /* EVENT_WRITE_EOP - flush caches, send int */
6300         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6301         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6302                                  EOP_TC_ACTION_EN |
6303                                  EOP_TC_WB_ACTION_EN |
6304                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6305                                  EVENT_INDEX(5)));
6306         amdgpu_ring_write(ring, addr & 0xfffffffc);
6307         amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
6308                           DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6309         amdgpu_ring_write(ring, lower_32_bits(seq));
6310         amdgpu_ring_write(ring, upper_32_bits(seq));
6311
6312 }
6313
6314 static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
6315 {
6316         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6317         uint32_t seq = ring->fence_drv.sync_seq;
6318         uint64_t addr = ring->fence_drv.gpu_addr;
6319
6320         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6321         amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
6322                                  WAIT_REG_MEM_FUNCTION(3) | /* equal */
6323                                  WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
6324         amdgpu_ring_write(ring, addr & 0xfffffffc);
6325         amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
6326         amdgpu_ring_write(ring, seq);
6327         amdgpu_ring_write(ring, 0xffffffff);
6328         amdgpu_ring_write(ring, 4); /* poll interval */
6329 }
6330
6331 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
6332                                         unsigned vm_id, uint64_t pd_addr)
6333 {
6334         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6335
6336         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6337         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6338                                  WRITE_DATA_DST_SEL(0)) |
6339                                  WR_CONFIRM);
6340         if (vm_id < 8) {
6341                 amdgpu_ring_write(ring,
6342                                   (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id));
6343         } else {
6344                 amdgpu_ring_write(ring,
6345                                   (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vm_id - 8));
6346         }
6347         amdgpu_ring_write(ring, 0);
6348         amdgpu_ring_write(ring, pd_addr >> 12);
6349
6350         /* bits 0-15 are the VM contexts0-15 */
6351         /* invalidate the cache */
6352         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6353         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6354                                  WRITE_DATA_DST_SEL(0)));
6355         amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6356         amdgpu_ring_write(ring, 0);
6357         amdgpu_ring_write(ring, 1 << vm_id);
6358
6359         /* wait for the invalidate to complete */
6360         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6361         amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6362                                  WAIT_REG_MEM_FUNCTION(0) |  /* always */
6363                                  WAIT_REG_MEM_ENGINE(0))); /* me */
6364         amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6365         amdgpu_ring_write(ring, 0);
6366         amdgpu_ring_write(ring, 0); /* ref */
6367         amdgpu_ring_write(ring, 0); /* mask */
6368         amdgpu_ring_write(ring, 0x20); /* poll interval */
6369
6370         /* compute doesn't have PFP */
6371         if (usepfp) {
6372                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
6373                 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6374                 amdgpu_ring_write(ring, 0x0);
6375         }
6376 }
6377
6378 static u64 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
6379 {
6380         return ring->adev->wb.wb[ring->wptr_offs];
6381 }
6382
6383 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
6384 {
6385         struct amdgpu_device *adev = ring->adev;
6386
6387         /* XXX check if swapping is necessary on BE */
6388         adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6389         WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6390 }
6391
6392 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
6393                                              u64 addr, u64 seq,
6394                                              unsigned flags)
6395 {
6396         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6397         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6398
6399         /* RELEASE_MEM - flush caches, send int */
6400         amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
6401         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6402                                  EOP_TC_ACTION_EN |
6403                                  EOP_TC_WB_ACTION_EN |
6404                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6405                                  EVENT_INDEX(5)));
6406         amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6407         amdgpu_ring_write(ring, addr & 0xfffffffc);
6408         amdgpu_ring_write(ring, upper_32_bits(addr));
6409         amdgpu_ring_write(ring, lower_32_bits(seq));
6410         amdgpu_ring_write(ring, upper_32_bits(seq));
6411 }
6412
6413 static void gfx_v8_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
6414                                          u64 seq, unsigned int flags)
6415 {
6416         /* we only allocate 32bit for each seq wb address */
6417         BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
6418
6419         /* write fence seq to the "addr" */
6420         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6421         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6422                                  WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
6423         amdgpu_ring_write(ring, lower_32_bits(addr));
6424         amdgpu_ring_write(ring, upper_32_bits(addr));
6425         amdgpu_ring_write(ring, lower_32_bits(seq));
6426
6427         if (flags & AMDGPU_FENCE_FLAG_INT) {
6428                 /* set register to trigger INT */
6429                 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6430                 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6431                                          WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
6432                 amdgpu_ring_write(ring, mmCPC_INT_STATUS);
6433                 amdgpu_ring_write(ring, 0);
6434                 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
6435         }
6436 }
6437
6438 static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring)
6439 {
6440         amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6441         amdgpu_ring_write(ring, 0);
6442 }
6443
6444 static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
6445 {
6446         uint32_t dw2 = 0;
6447
6448         if (amdgpu_sriov_vf(ring->adev))
6449                 gfx_v8_0_ring_emit_ce_meta(ring);
6450
6451         dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
6452         if (flags & AMDGPU_HAVE_CTX_SWITCH) {
6453                 gfx_v8_0_ring_emit_vgt_flush(ring);
6454                 /* set load_global_config & load_global_uconfig */
6455                 dw2 |= 0x8001;
6456                 /* set load_cs_sh_regs */
6457                 dw2 |= 0x01000000;
6458                 /* set load_per_context_state & load_gfx_sh_regs for GFX */
6459                 dw2 |= 0x10002;
6460
6461                 /* set load_ce_ram if preamble presented */
6462                 if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
6463                         dw2 |= 0x10000000;
6464         } else {
6465                 /* still load_ce_ram if this is the first time preamble presented
6466                  * although there is no context switch happens.
6467                  */
6468                 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
6469                         dw2 |= 0x10000000;
6470         }
6471
6472         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6473         amdgpu_ring_write(ring, dw2);
6474         amdgpu_ring_write(ring, 0);
6475 }
6476
6477 static unsigned gfx_v8_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
6478 {
6479         unsigned ret;
6480
6481         amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
6482         amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
6483         amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
6484         amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
6485         ret = ring->wptr & ring->buf_mask;
6486         amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
6487         return ret;
6488 }
6489
6490 static void gfx_v8_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
6491 {
6492         unsigned cur;
6493
6494         BUG_ON(offset > ring->buf_mask);
6495         BUG_ON(ring->ring[offset] != 0x55aa55aa);
6496
6497         cur = (ring->wptr & ring->buf_mask) - 1;
6498         if (likely(cur > offset))
6499                 ring->ring[offset] = cur - offset;
6500         else
6501                 ring->ring[offset] = (ring->ring_size >> 2) - offset + cur;
6502 }
6503
6504 static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
6505 {
6506         struct amdgpu_device *adev = ring->adev;
6507
6508         amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
6509         amdgpu_ring_write(ring, 0 |     /* src: register*/
6510                                 (5 << 8) |      /* dst: memory */
6511                                 (1 << 20));     /* write confirm */
6512         amdgpu_ring_write(ring, reg);
6513         amdgpu_ring_write(ring, 0);
6514         amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
6515                                 adev->virt.reg_val_offs * 4));
6516         amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
6517                                 adev->virt.reg_val_offs * 4));
6518 }
6519
6520 static void gfx_v8_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
6521                                   uint32_t val)
6522 {
6523         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6524         amdgpu_ring_write(ring, (1 << 16)); /* no inc addr */
6525         amdgpu_ring_write(ring, reg);
6526         amdgpu_ring_write(ring, 0);
6527         amdgpu_ring_write(ring, val);
6528 }
6529
6530 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
6531                                                  enum amdgpu_interrupt_state state)
6532 {
6533         WREG32_FIELD(CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE,
6534                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6535 }
6536
6537 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
6538                                                      int me, int pipe,
6539                                                      enum amdgpu_interrupt_state state)
6540 {
6541         u32 mec_int_cntl, mec_int_cntl_reg;
6542
6543         /*
6544          * amdgpu controls only the first MEC. That's why this function only
6545          * handles the setting of interrupts for this specific MEC. All other
6546          * pipes' interrupts are set by amdkfd.
6547          */
6548
6549         if (me == 1) {
6550                 switch (pipe) {
6551                 case 0:
6552                         mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
6553                         break;
6554                 case 1:
6555                         mec_int_cntl_reg = mmCP_ME1_PIPE1_INT_CNTL;
6556                         break;
6557                 case 2:
6558                         mec_int_cntl_reg = mmCP_ME1_PIPE2_INT_CNTL;
6559                         break;
6560                 case 3:
6561                         mec_int_cntl_reg = mmCP_ME1_PIPE3_INT_CNTL;
6562                         break;
6563                 default:
6564                         DRM_DEBUG("invalid pipe %d\n", pipe);
6565                         return;
6566                 }
6567         } else {
6568                 DRM_DEBUG("invalid me %d\n", me);
6569                 return;
6570         }
6571
6572         switch (state) {
6573         case AMDGPU_IRQ_STATE_DISABLE:
6574                 mec_int_cntl = RREG32(mec_int_cntl_reg);
6575                 mec_int_cntl &= ~CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6576                 WREG32(mec_int_cntl_reg, mec_int_cntl);
6577                 break;
6578         case AMDGPU_IRQ_STATE_ENABLE:
6579                 mec_int_cntl = RREG32(mec_int_cntl_reg);
6580                 mec_int_cntl |= CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6581                 WREG32(mec_int_cntl_reg, mec_int_cntl);
6582                 break;
6583         default:
6584                 break;
6585         }
6586 }
6587
6588 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
6589                                              struct amdgpu_irq_src *source,
6590                                              unsigned type,
6591                                              enum amdgpu_interrupt_state state)
6592 {
6593         WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE,
6594                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6595
6596         return 0;
6597 }
6598
6599 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
6600                                               struct amdgpu_irq_src *source,
6601                                               unsigned type,
6602                                               enum amdgpu_interrupt_state state)
6603 {
6604         WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE,
6605                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6606
6607         return 0;
6608 }
6609
6610 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6611                                             struct amdgpu_irq_src *src,
6612                                             unsigned type,
6613                                             enum amdgpu_interrupt_state state)
6614 {
6615         switch (type) {
6616         case AMDGPU_CP_IRQ_GFX_EOP:
6617                 gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
6618                 break;
6619         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6620                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6621                 break;
6622         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6623                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6624                 break;
6625         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6626                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6627                 break;
6628         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6629                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6630                 break;
6631         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6632                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6633                 break;
6634         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6635                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6636                 break;
6637         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6638                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6639                 break;
6640         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6641                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6642                 break;
6643         default:
6644                 break;
6645         }
6646         return 0;
6647 }
6648
6649 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
6650                             struct amdgpu_irq_src *source,
6651                             struct amdgpu_iv_entry *entry)
6652 {
6653         int i;
6654         u8 me_id, pipe_id, queue_id;
6655         struct amdgpu_ring *ring;
6656
6657         DRM_DEBUG("IH: CP EOP\n");
6658         me_id = (entry->ring_id & 0x0c) >> 2;
6659         pipe_id = (entry->ring_id & 0x03) >> 0;
6660         queue_id = (entry->ring_id & 0x70) >> 4;
6661
6662         switch (me_id) {
6663         case 0:
6664                 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6665                 break;
6666         case 1:
6667         case 2:
6668                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6669                         ring = &adev->gfx.compute_ring[i];
6670                         /* Per-queue interrupt is supported for MEC starting from VI.
6671                           * The interrupt can only be enabled/disabled per pipe instead of per queue.
6672                           */
6673                         if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6674                                 amdgpu_fence_process(ring);
6675                 }
6676                 break;
6677         }
6678         return 0;
6679 }
6680
6681 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
6682                                  struct amdgpu_irq_src *source,
6683                                  struct amdgpu_iv_entry *entry)
6684 {
6685         DRM_ERROR("Illegal register access in command stream\n");
6686         schedule_work(&adev->reset_work);
6687         return 0;
6688 }
6689
6690 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
6691                                   struct amdgpu_irq_src *source,
6692                                   struct amdgpu_iv_entry *entry)
6693 {
6694         DRM_ERROR("Illegal instruction in command stream\n");
6695         schedule_work(&adev->reset_work);
6696         return 0;
6697 }
6698
6699 static int gfx_v8_0_kiq_set_interrupt_state(struct amdgpu_device *adev,
6700                                             struct amdgpu_irq_src *src,
6701                                             unsigned int type,
6702                                             enum amdgpu_interrupt_state state)
6703 {
6704         struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
6705
6706         switch (type) {
6707         case AMDGPU_CP_KIQ_IRQ_DRIVER0:
6708                 WREG32_FIELD(CPC_INT_CNTL, GENERIC2_INT_ENABLE,
6709                              state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6710                 if (ring->me == 1)
6711                         WREG32_FIELD_OFFSET(CP_ME1_PIPE0_INT_CNTL,
6712                                      ring->pipe,
6713                                      GENERIC2_INT_ENABLE,
6714                                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6715                 else
6716                         WREG32_FIELD_OFFSET(CP_ME2_PIPE0_INT_CNTL,
6717                                      ring->pipe,
6718                                      GENERIC2_INT_ENABLE,
6719                                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6720                 break;
6721         default:
6722                 BUG(); /* kiq only support GENERIC2_INT now */
6723                 break;
6724         }
6725         return 0;
6726 }
6727
6728 static int gfx_v8_0_kiq_irq(struct amdgpu_device *adev,
6729                             struct amdgpu_irq_src *source,
6730                             struct amdgpu_iv_entry *entry)
6731 {
6732         u8 me_id, pipe_id, queue_id;
6733         struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
6734
6735         me_id = (entry->ring_id & 0x0c) >> 2;
6736         pipe_id = (entry->ring_id & 0x03) >> 0;
6737         queue_id = (entry->ring_id & 0x70) >> 4;
6738         DRM_DEBUG("IH: CPC GENERIC2_INT, me:%d, pipe:%d, queue:%d\n",
6739                    me_id, pipe_id, queue_id);
6740
6741         amdgpu_fence_process(ring);
6742         return 0;
6743 }
6744
6745 static const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
6746         .name = "gfx_v8_0",
6747         .early_init = gfx_v8_0_early_init,
6748         .late_init = gfx_v8_0_late_init,
6749         .sw_init = gfx_v8_0_sw_init,
6750         .sw_fini = gfx_v8_0_sw_fini,
6751         .hw_init = gfx_v8_0_hw_init,
6752         .hw_fini = gfx_v8_0_hw_fini,
6753         .suspend = gfx_v8_0_suspend,
6754         .resume = gfx_v8_0_resume,
6755         .is_idle = gfx_v8_0_is_idle,
6756         .wait_for_idle = gfx_v8_0_wait_for_idle,
6757         .check_soft_reset = gfx_v8_0_check_soft_reset,
6758         .pre_soft_reset = gfx_v8_0_pre_soft_reset,
6759         .soft_reset = gfx_v8_0_soft_reset,
6760         .post_soft_reset = gfx_v8_0_post_soft_reset,
6761         .set_clockgating_state = gfx_v8_0_set_clockgating_state,
6762         .set_powergating_state = gfx_v8_0_set_powergating_state,
6763         .get_clockgating_state = gfx_v8_0_get_clockgating_state,
6764 };
6765
6766 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
6767         .type = AMDGPU_RING_TYPE_GFX,
6768         .align_mask = 0xff,
6769         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6770         .support_64bit_ptrs = false,
6771         .get_rptr = gfx_v8_0_ring_get_rptr,
6772         .get_wptr = gfx_v8_0_ring_get_wptr_gfx,
6773         .set_wptr = gfx_v8_0_ring_set_wptr_gfx,
6774         .emit_frame_size = /* maximum 215dw if count 16 IBs in */
6775                 5 +  /* COND_EXEC */
6776                 7 +  /* PIPELINE_SYNC */
6777                 19 + /* VM_FLUSH */
6778                 8 +  /* FENCE for VM_FLUSH */
6779                 20 + /* GDS switch */
6780                 4 + /* double SWITCH_BUFFER,
6781                        the first COND_EXEC jump to the place just
6782                            prior to this double SWITCH_BUFFER  */
6783                 5 + /* COND_EXEC */
6784                 7 +      /*     HDP_flush */
6785                 4 +      /*     VGT_flush */
6786                 14 + /* CE_META */
6787                 31 + /* DE_META */
6788                 3 + /* CNTX_CTRL */
6789                 5 + /* HDP_INVL */
6790                 8 + 8 + /* FENCE x2 */
6791                 2, /* SWITCH_BUFFER */
6792         .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_gfx */
6793         .emit_ib = gfx_v8_0_ring_emit_ib_gfx,
6794         .emit_fence = gfx_v8_0_ring_emit_fence_gfx,
6795         .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6796         .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6797         .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6798         .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6799         .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
6800         .test_ring = gfx_v8_0_ring_test_ring,
6801         .test_ib = gfx_v8_0_ring_test_ib,
6802         .insert_nop = amdgpu_ring_insert_nop,
6803         .pad_ib = amdgpu_ring_generic_pad_ib,
6804         .emit_switch_buffer = gfx_v8_ring_emit_sb,
6805         .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
6806         .init_cond_exec = gfx_v8_0_ring_emit_init_cond_exec,
6807         .patch_cond_exec = gfx_v8_0_ring_emit_patch_cond_exec,
6808 };
6809
6810 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
6811         .type = AMDGPU_RING_TYPE_COMPUTE,
6812         .align_mask = 0xff,
6813         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6814         .support_64bit_ptrs = false,
6815         .get_rptr = gfx_v8_0_ring_get_rptr,
6816         .get_wptr = gfx_v8_0_ring_get_wptr_compute,
6817         .set_wptr = gfx_v8_0_ring_set_wptr_compute,
6818         .emit_frame_size =
6819                 20 + /* gfx_v8_0_ring_emit_gds_switch */
6820                 7 + /* gfx_v8_0_ring_emit_hdp_flush */
6821                 5 + /* gfx_v8_0_ring_emit_hdp_invalidate */
6822                 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6823                 17 + /* gfx_v8_0_ring_emit_vm_flush */
6824                 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
6825         .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */
6826         .emit_ib = gfx_v8_0_ring_emit_ib_compute,
6827         .emit_fence = gfx_v8_0_ring_emit_fence_compute,
6828         .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6829         .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6830         .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6831         .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6832         .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
6833         .test_ring = gfx_v8_0_ring_test_ring,
6834         .test_ib = gfx_v8_0_ring_test_ib,
6835         .insert_nop = amdgpu_ring_insert_nop,
6836         .pad_ib = amdgpu_ring_generic_pad_ib,
6837 };
6838
6839 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {
6840         .type = AMDGPU_RING_TYPE_KIQ,
6841         .align_mask = 0xff,
6842         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6843         .support_64bit_ptrs = false,
6844         .get_rptr = gfx_v8_0_ring_get_rptr,
6845         .get_wptr = gfx_v8_0_ring_get_wptr_compute,
6846         .set_wptr = gfx_v8_0_ring_set_wptr_compute,
6847         .emit_frame_size =
6848                 20 + /* gfx_v8_0_ring_emit_gds_switch */
6849                 7 + /* gfx_v8_0_ring_emit_hdp_flush */
6850                 5 + /* gfx_v8_0_ring_emit_hdp_invalidate */
6851                 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6852                 17 + /* gfx_v8_0_ring_emit_vm_flush */
6853                 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */
6854         .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */
6855         .emit_ib = gfx_v8_0_ring_emit_ib_compute,
6856         .emit_fence = gfx_v8_0_ring_emit_fence_kiq,
6857         .test_ring = gfx_v8_0_ring_test_ring,
6858         .test_ib = gfx_v8_0_ring_test_ib,
6859         .insert_nop = amdgpu_ring_insert_nop,
6860         .pad_ib = amdgpu_ring_generic_pad_ib,
6861         .emit_rreg = gfx_v8_0_ring_emit_rreg,
6862         .emit_wreg = gfx_v8_0_ring_emit_wreg,
6863 };
6864
6865 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
6866 {
6867         int i;
6868
6869         adev->gfx.kiq.ring.funcs = &gfx_v8_0_ring_funcs_kiq;
6870
6871         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
6872                 adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
6873
6874         for (i = 0; i < adev->gfx.num_compute_rings; i++)
6875                 adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
6876 }
6877
6878 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
6879         .set = gfx_v8_0_set_eop_interrupt_state,
6880         .process = gfx_v8_0_eop_irq,
6881 };
6882
6883 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
6884         .set = gfx_v8_0_set_priv_reg_fault_state,
6885         .process = gfx_v8_0_priv_reg_irq,
6886 };
6887
6888 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
6889         .set = gfx_v8_0_set_priv_inst_fault_state,
6890         .process = gfx_v8_0_priv_inst_irq,
6891 };
6892
6893 static const struct amdgpu_irq_src_funcs gfx_v8_0_kiq_irq_funcs = {
6894         .set = gfx_v8_0_kiq_set_interrupt_state,
6895         .process = gfx_v8_0_kiq_irq,
6896 };
6897
6898 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
6899 {
6900         adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
6901         adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
6902
6903         adev->gfx.priv_reg_irq.num_types = 1;
6904         adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
6905
6906         adev->gfx.priv_inst_irq.num_types = 1;
6907         adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
6908
6909         adev->gfx.kiq.irq.num_types = AMDGPU_CP_KIQ_IRQ_LAST;
6910         adev->gfx.kiq.irq.funcs = &gfx_v8_0_kiq_irq_funcs;
6911 }
6912
6913 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
6914 {
6915         adev->gfx.rlc.funcs = &iceland_rlc_funcs;
6916 }
6917
6918 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
6919 {
6920         /* init asci gds info */
6921         adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
6922         adev->gds.gws.total_size = 64;
6923         adev->gds.oa.total_size = 16;
6924
6925         if (adev->gds.mem.total_size == 64 * 1024) {
6926                 adev->gds.mem.gfx_partition_size = 4096;
6927                 adev->gds.mem.cs_partition_size = 4096;
6928
6929                 adev->gds.gws.gfx_partition_size = 4;
6930                 adev->gds.gws.cs_partition_size = 4;
6931
6932                 adev->gds.oa.gfx_partition_size = 4;
6933                 adev->gds.oa.cs_partition_size = 1;
6934         } else {
6935                 adev->gds.mem.gfx_partition_size = 1024;
6936                 adev->gds.mem.cs_partition_size = 1024;
6937
6938                 adev->gds.gws.gfx_partition_size = 16;
6939                 adev->gds.gws.cs_partition_size = 16;
6940
6941                 adev->gds.oa.gfx_partition_size = 4;
6942                 adev->gds.oa.cs_partition_size = 4;
6943         }
6944 }
6945
6946 static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
6947                                                  u32 bitmap)
6948 {
6949         u32 data;
6950
6951         if (!bitmap)
6952                 return;
6953
6954         data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
6955         data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
6956
6957         WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
6958 }
6959
6960 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
6961 {
6962         u32 data, mask;
6963
6964         data =  RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) |
6965                 RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
6966
6967         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
6968
6969         return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask;
6970 }
6971
6972 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
6973 {
6974         int i, j, k, counter, active_cu_number = 0;
6975         u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
6976         struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
6977         unsigned disable_masks[4 * 2];
6978         u32 ao_cu_num;
6979
6980         memset(cu_info, 0, sizeof(*cu_info));
6981
6982         if (adev->flags & AMD_IS_APU)
6983                 ao_cu_num = 2;
6984         else
6985                 ao_cu_num = adev->gfx.config.max_cu_per_sh;
6986
6987         amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
6988
6989         mutex_lock(&adev->grbm_idx_mutex);
6990         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
6991                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
6992                         mask = 1;
6993                         ao_bitmap = 0;
6994                         counter = 0;
6995                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
6996                         if (i < 4 && j < 2)
6997                                 gfx_v8_0_set_user_cu_inactive_bitmap(
6998                                         adev, disable_masks[i * 2 + j]);
6999                         bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
7000                         cu_info->bitmap[i][j] = bitmap;
7001
7002                         for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
7003                                 if (bitmap & mask) {
7004                                         if (counter < ao_cu_num)
7005                                                 ao_bitmap |= mask;
7006                                         counter ++;
7007                                 }
7008                                 mask <<= 1;
7009                         }
7010                         active_cu_number += counter;
7011                         if (i < 2 && j < 2)
7012                                 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
7013                         cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
7014                 }
7015         }
7016         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
7017         mutex_unlock(&adev->grbm_idx_mutex);
7018
7019         cu_info->number = active_cu_number;
7020         cu_info->ao_cu_mask = ao_cu_mask;
7021 }
7022
7023 const struct amdgpu_ip_block_version gfx_v8_0_ip_block =
7024 {
7025         .type = AMD_IP_BLOCK_TYPE_GFX,
7026         .major = 8,
7027         .minor = 0,
7028         .rev = 0,
7029         .funcs = &gfx_v8_0_ip_funcs,
7030 };
7031
7032 const struct amdgpu_ip_block_version gfx_v8_1_ip_block =
7033 {
7034         .type = AMD_IP_BLOCK_TYPE_GFX,
7035         .major = 8,
7036         .minor = 1,
7037         .rev = 0,
7038         .funcs = &gfx_v8_0_ip_funcs,
7039 };
7040
7041 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
7042 {
7043         uint64_t ce_payload_addr;
7044         int cnt_ce;
7045         static union {
7046                 struct vi_ce_ib_state regular;
7047                 struct vi_ce_ib_state_chained_ib chained;
7048         } ce_payload = {};
7049
7050         if (ring->adev->virt.chained_ib_support) {
7051                 ce_payload_addr = AMDGPU_VA_RESERVED_SIZE - 2 * 4096 +
7052                                                   offsetof(struct vi_gfx_meta_data_chained_ib, ce_payload);
7053                 cnt_ce = (sizeof(ce_payload.chained) >> 2) + 4 - 2;
7054         } else {
7055                 ce_payload_addr = AMDGPU_VA_RESERVED_SIZE - 2 * 4096 +
7056                                                   offsetof(struct vi_gfx_meta_data, ce_payload);
7057                 cnt_ce = (sizeof(ce_payload.regular) >> 2) + 4 - 2;
7058         }
7059
7060         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_ce));
7061         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
7062                                 WRITE_DATA_DST_SEL(8) |
7063                                 WR_CONFIRM) |
7064                                 WRITE_DATA_CACHE_POLICY(0));
7065         amdgpu_ring_write(ring, lower_32_bits(ce_payload_addr));
7066         amdgpu_ring_write(ring, upper_32_bits(ce_payload_addr));
7067         amdgpu_ring_write_multiple(ring, (void *)&ce_payload, cnt_ce - 2);
7068 }
7069
7070 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring)
7071 {
7072         uint64_t de_payload_addr, gds_addr, csa_addr;
7073         int cnt_de;
7074         static union {
7075                 struct vi_de_ib_state regular;
7076                 struct vi_de_ib_state_chained_ib chained;
7077         } de_payload = {};
7078
7079         csa_addr = AMDGPU_VA_RESERVED_SIZE - 2 * 4096;
7080         gds_addr = csa_addr + 4096;
7081         if (ring->adev->virt.chained_ib_support) {
7082                 de_payload.chained.gds_backup_addrlo = lower_32_bits(gds_addr);
7083                 de_payload.chained.gds_backup_addrhi = upper_32_bits(gds_addr);
7084                 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data_chained_ib, de_payload);
7085                 cnt_de = (sizeof(de_payload.chained) >> 2) + 4 - 2;
7086         } else {
7087                 de_payload.regular.gds_backup_addrlo = lower_32_bits(gds_addr);
7088                 de_payload.regular.gds_backup_addrhi = upper_32_bits(gds_addr);
7089                 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data, de_payload);
7090                 cnt_de = (sizeof(de_payload.regular) >> 2) + 4 - 2;
7091         }
7092
7093         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_de));
7094         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
7095                                 WRITE_DATA_DST_SEL(8) |
7096                                 WR_CONFIRM) |
7097                                 WRITE_DATA_CACHE_POLICY(0));
7098         amdgpu_ring_write(ring, lower_32_bits(de_payload_addr));
7099         amdgpu_ring_write(ring, upper_32_bits(de_payload_addr));
7100         amdgpu_ring_write_multiple(ring, (void *)&de_payload, cnt_de - 2);
7101 }
This page took 0.466031 seconds and 4 git commands to generate.