]> Git Repo - linux.git/blob - drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
Backmerge tag 'v4.12-rc7' into drm-next
[linux.git] / drivers / gpu / drm / amd / amdgpu / gfx_v8_0.c
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 #include <linux/firmware.h>
24 #include <drm/drmP.h>
25 #include "amdgpu.h"
26 #include "amdgpu_gfx.h"
27 #include "vi.h"
28 #include "vi_structs.h"
29 #include "vid.h"
30 #include "amdgpu_ucode.h"
31 #include "amdgpu_atombios.h"
32 #include "atombios_i2c.h"
33 #include "clearstate_vi.h"
34
35 #include "gmc/gmc_8_2_d.h"
36 #include "gmc/gmc_8_2_sh_mask.h"
37
38 #include "oss/oss_3_0_d.h"
39 #include "oss/oss_3_0_sh_mask.h"
40
41 #include "bif/bif_5_0_d.h"
42 #include "bif/bif_5_0_sh_mask.h"
43
44 #include "gca/gfx_8_0_d.h"
45 #include "gca/gfx_8_0_enum.h"
46 #include "gca/gfx_8_0_sh_mask.h"
47 #include "gca/gfx_8_0_enum.h"
48
49 #include "dce/dce_10_0_d.h"
50 #include "dce/dce_10_0_sh_mask.h"
51
52 #include "smu/smu_7_1_3_d.h"
53
54 #define GFX8_NUM_GFX_RINGS     1
55 #define GFX8_MEC_HPD_SIZE 2048
56
57 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
58 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
59 #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
60 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
61
62 #define ARRAY_MODE(x)                                   ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
63 #define PIPE_CONFIG(x)                                  ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
64 #define TILE_SPLIT(x)                                   ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
65 #define MICRO_TILE_MODE_NEW(x)                          ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
66 #define SAMPLE_SPLIT(x)                                 ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
67 #define BANK_WIDTH(x)                                   ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
68 #define BANK_HEIGHT(x)                                  ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
69 #define MACRO_TILE_ASPECT(x)                            ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
70 #define NUM_BANKS(x)                                    ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
71
72 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK            0x00000001L
73 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK            0x00000002L
74 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK           0x00000004L
75 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK           0x00000008L
76 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK           0x00000010L
77 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK           0x00000020L
78
79 /* BPM SERDES CMD */
80 #define SET_BPM_SERDES_CMD    1
81 #define CLE_BPM_SERDES_CMD    0
82
83 /* BPM Register Address*/
84 enum {
85         BPM_REG_CGLS_EN = 0,        /* Enable/Disable CGLS */
86         BPM_REG_CGLS_ON,            /* ON/OFF CGLS: shall be controlled by RLC FW */
87         BPM_REG_CGCG_OVERRIDE,      /* Set/Clear CGCG Override */
88         BPM_REG_MGCG_OVERRIDE,      /* Set/Clear MGCG Override */
89         BPM_REG_FGCG_OVERRIDE,      /* Set/Clear FGCG Override */
90         BPM_REG_FGCG_MAX
91 };
92
93 #define RLC_FormatDirectRegListLength        14
94
95 MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
96 MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
97 MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
98 MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
99 MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
100 MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
101
102 MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
103 MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
104 MODULE_FIRMWARE("amdgpu/stoney_me.bin");
105 MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
106 MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
107
108 MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
109 MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
110 MODULE_FIRMWARE("amdgpu/tonga_me.bin");
111 MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
112 MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
113 MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
114
115 MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
116 MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
117 MODULE_FIRMWARE("amdgpu/topaz_me.bin");
118 MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
119 MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
120
121 MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
122 MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
123 MODULE_FIRMWARE("amdgpu/fiji_me.bin");
124 MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
125 MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
126 MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
127
128 MODULE_FIRMWARE("amdgpu/polaris11_ce.bin");
129 MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin");
130 MODULE_FIRMWARE("amdgpu/polaris11_me.bin");
131 MODULE_FIRMWARE("amdgpu/polaris11_mec.bin");
132 MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin");
133 MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin");
134
135 MODULE_FIRMWARE("amdgpu/polaris10_ce.bin");
136 MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin");
137 MODULE_FIRMWARE("amdgpu/polaris10_me.bin");
138 MODULE_FIRMWARE("amdgpu/polaris10_mec.bin");
139 MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin");
140 MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin");
141
142 MODULE_FIRMWARE("amdgpu/polaris12_ce.bin");
143 MODULE_FIRMWARE("amdgpu/polaris12_pfp.bin");
144 MODULE_FIRMWARE("amdgpu/polaris12_me.bin");
145 MODULE_FIRMWARE("amdgpu/polaris12_mec.bin");
146 MODULE_FIRMWARE("amdgpu/polaris12_mec2.bin");
147 MODULE_FIRMWARE("amdgpu/polaris12_rlc.bin");
148
149 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
150 {
151         {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
152         {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
153         {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
154         {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
155         {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
156         {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
157         {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
158         {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
159         {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
160         {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
161         {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
162         {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
163         {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
164         {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
165         {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
166         {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
167 };
168
169 static const u32 golden_settings_tonga_a11[] =
170 {
171         mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
172         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
173         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
174         mmGB_GPU_ID, 0x0000000f, 0x00000000,
175         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
176         mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
177         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
178         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
179         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
180         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
181         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
182         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
183         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
184         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
185         mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
186         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
187 };
188
189 static const u32 tonga_golden_common_all[] =
190 {
191         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
192         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
193         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
194         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
195         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
196         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
197         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
198         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
199 };
200
201 static const u32 tonga_mgcg_cgcg_init[] =
202 {
203         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
204         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
205         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
206         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
207         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
208         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
209         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
210         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
211         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
212         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
213         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
214         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
215         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
216         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
217         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
218         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
219         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
220         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
221         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
222         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
223         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
224         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
225         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
226         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
227         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
228         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
229         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
230         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
231         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
232         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
233         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
234         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
235         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
236         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
237         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
238         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
239         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
240         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
241         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
242         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
243         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
244         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
245         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
246         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
247         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
248         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
249         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
250         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
251         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
252         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
253         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
254         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
255         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
256         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
257         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
258         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
259         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
260         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
261         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
262         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
263         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
264         mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
265         mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
266         mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
267         mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
268         mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
269         mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
270         mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
271         mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
272         mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
273         mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
274         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
275         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
276         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
277         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
278 };
279
280 static const u32 golden_settings_polaris11_a11[] =
281 {
282         mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208,
283         mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
284         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
285         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
286         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
287         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
288         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
289         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
290         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
291         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
292         mmSQ_CONFIG, 0x07f80000, 0x01180000,
293         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
294         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
295         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
296         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
297         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
298         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
299 };
300
301 static const u32 polaris11_golden_common_all[] =
302 {
303         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
304         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002,
305         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
306         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
307         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
308         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
309 };
310
311 static const u32 golden_settings_polaris10_a11[] =
312 {
313         mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
314         mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
315         mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
316         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
317         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
318         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
319         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
320         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
321         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a,
322         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
323         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
324         mmSQ_CONFIG, 0x07f80000, 0x07180000,
325         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
326         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
327         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
328         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
329         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
330 };
331
332 static const u32 polaris10_golden_common_all[] =
333 {
334         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
335         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
336         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
337         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
338         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
339         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
340         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
341         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
342 };
343
344 static const u32 fiji_golden_common_all[] =
345 {
346         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
347         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
348         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
349         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
350         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
351         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
352         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
353         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
354         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
355         mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
356 };
357
358 static const u32 golden_settings_fiji_a10[] =
359 {
360         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
361         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
362         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
363         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
364         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
365         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
366         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
367         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
368         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
369         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
370         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
371 };
372
373 static const u32 fiji_mgcg_cgcg_init[] =
374 {
375         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
376         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
377         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
378         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
379         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
380         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
381         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
382         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
383         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
384         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
385         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
386         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
387         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
388         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
389         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
390         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
391         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
392         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
393         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
394         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
395         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
396         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
397         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
398         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
399         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
400         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
401         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
402         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
403         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
404         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
405         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
406         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
407         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
408         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
409         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
410 };
411
412 static const u32 golden_settings_iceland_a11[] =
413 {
414         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
415         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
416         mmDB_DEBUG3, 0xc0000000, 0xc0000000,
417         mmGB_GPU_ID, 0x0000000f, 0x00000000,
418         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
419         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
420         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
421         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
422         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
423         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
424         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
425         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
426         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
427         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
428         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
429         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
430 };
431
432 static const u32 iceland_golden_common_all[] =
433 {
434         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
435         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
436         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
437         mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
438         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
439         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
440         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
441         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
442 };
443
444 static const u32 iceland_mgcg_cgcg_init[] =
445 {
446         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
447         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
448         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
449         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
450         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
451         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
452         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
453         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
454         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
455         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
456         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
457         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
458         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
459         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
460         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
461         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
462         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
463         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
464         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
465         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
466         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
467         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
468         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
469         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
470         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
471         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
472         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
473         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
474         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
475         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
476         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
477         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
478         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
479         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
480         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
481         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
482         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
483         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
484         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
485         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
486         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
487         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
488         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
489         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
490         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
491         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
492         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
493         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
494         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
495         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
496         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
497         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
498         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
499         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
500         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
501         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
502         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
503         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
504         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
505         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
506         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
507         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
508         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
509         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
510 };
511
512 static const u32 cz_golden_settings_a11[] =
513 {
514         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
515         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
516         mmGB_GPU_ID, 0x0000000f, 0x00000000,
517         mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
518         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
519         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
520         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
521         mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
522         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
523         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
524         mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
525         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
526 };
527
528 static const u32 cz_golden_common_all[] =
529 {
530         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
531         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
532         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
533         mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
534         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
535         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
536         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
537         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
538 };
539
540 static const u32 cz_mgcg_cgcg_init[] =
541 {
542         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
543         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
544         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
545         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
546         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
547         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
548         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
549         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
550         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
551         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
552         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
553         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
554         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
555         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
556         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
557         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
558         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
559         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
560         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
561         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
562         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
563         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
564         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
565         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
566         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
567         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
568         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
569         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
570         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
571         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
572         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
573         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
574         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
575         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
576         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
577         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
578         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
579         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
580         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
581         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
582         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
583         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
584         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
585         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
586         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
587         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
588         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
589         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
590         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
591         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
592         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
593         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
594         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
595         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
596         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
597         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
598         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
599         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
600         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
601         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
602         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
603         mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
604         mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
605         mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
606         mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
607         mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
608         mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
609         mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
610         mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
611         mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
612         mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
613         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
614         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
615         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
616         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
617 };
618
619 static const u32 stoney_golden_settings_a11[] =
620 {
621         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
622         mmGB_GPU_ID, 0x0000000f, 0x00000000,
623         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
624         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
625         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
626         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
627         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
628         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
629         mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
630         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
631 };
632
633 static const u32 stoney_golden_common_all[] =
634 {
635         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
636         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
637         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
638         mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
639         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
640         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
641         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
642         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
643 };
644
645 static const u32 stoney_mgcg_cgcg_init[] =
646 {
647         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
648         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
649         mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
650         mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
651         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
652 };
653
654 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
655 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
656 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
657 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
658 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
659 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
660 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring);
661 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring);
662
663 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
664 {
665         switch (adev->asic_type) {
666         case CHIP_TOPAZ:
667                 amdgpu_program_register_sequence(adev,
668                                                  iceland_mgcg_cgcg_init,
669                                                  (const u32)ARRAY_SIZE(iceland_mgcg_cgcg_init));
670                 amdgpu_program_register_sequence(adev,
671                                                  golden_settings_iceland_a11,
672                                                  (const u32)ARRAY_SIZE(golden_settings_iceland_a11));
673                 amdgpu_program_register_sequence(adev,
674                                                  iceland_golden_common_all,
675                                                  (const u32)ARRAY_SIZE(iceland_golden_common_all));
676                 break;
677         case CHIP_FIJI:
678                 amdgpu_program_register_sequence(adev,
679                                                  fiji_mgcg_cgcg_init,
680                                                  (const u32)ARRAY_SIZE(fiji_mgcg_cgcg_init));
681                 amdgpu_program_register_sequence(adev,
682                                                  golden_settings_fiji_a10,
683                                                  (const u32)ARRAY_SIZE(golden_settings_fiji_a10));
684                 amdgpu_program_register_sequence(adev,
685                                                  fiji_golden_common_all,
686                                                  (const u32)ARRAY_SIZE(fiji_golden_common_all));
687                 break;
688
689         case CHIP_TONGA:
690                 amdgpu_program_register_sequence(adev,
691                                                  tonga_mgcg_cgcg_init,
692                                                  (const u32)ARRAY_SIZE(tonga_mgcg_cgcg_init));
693                 amdgpu_program_register_sequence(adev,
694                                                  golden_settings_tonga_a11,
695                                                  (const u32)ARRAY_SIZE(golden_settings_tonga_a11));
696                 amdgpu_program_register_sequence(adev,
697                                                  tonga_golden_common_all,
698                                                  (const u32)ARRAY_SIZE(tonga_golden_common_all));
699                 break;
700         case CHIP_POLARIS11:
701         case CHIP_POLARIS12:
702                 amdgpu_program_register_sequence(adev,
703                                                  golden_settings_polaris11_a11,
704                                                  (const u32)ARRAY_SIZE(golden_settings_polaris11_a11));
705                 amdgpu_program_register_sequence(adev,
706                                                  polaris11_golden_common_all,
707                                                  (const u32)ARRAY_SIZE(polaris11_golden_common_all));
708                 break;
709         case CHIP_POLARIS10:
710                 amdgpu_program_register_sequence(adev,
711                                                  golden_settings_polaris10_a11,
712                                                  (const u32)ARRAY_SIZE(golden_settings_polaris10_a11));
713                 amdgpu_program_register_sequence(adev,
714                                                  polaris10_golden_common_all,
715                                                  (const u32)ARRAY_SIZE(polaris10_golden_common_all));
716                 WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C);
717                 if (adev->pdev->revision == 0xc7 &&
718                     ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) ||
719                      (adev->pdev->subsystem_device == 0x4a8 && adev->pdev->subsystem_vendor == 0x1043) ||
720                      (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1682))) {
721                         amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD);
722                         amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0);
723                 }
724                 break;
725         case CHIP_CARRIZO:
726                 amdgpu_program_register_sequence(adev,
727                                                  cz_mgcg_cgcg_init,
728                                                  (const u32)ARRAY_SIZE(cz_mgcg_cgcg_init));
729                 amdgpu_program_register_sequence(adev,
730                                                  cz_golden_settings_a11,
731                                                  (const u32)ARRAY_SIZE(cz_golden_settings_a11));
732                 amdgpu_program_register_sequence(adev,
733                                                  cz_golden_common_all,
734                                                  (const u32)ARRAY_SIZE(cz_golden_common_all));
735                 break;
736         case CHIP_STONEY:
737                 amdgpu_program_register_sequence(adev,
738                                                  stoney_mgcg_cgcg_init,
739                                                  (const u32)ARRAY_SIZE(stoney_mgcg_cgcg_init));
740                 amdgpu_program_register_sequence(adev,
741                                                  stoney_golden_settings_a11,
742                                                  (const u32)ARRAY_SIZE(stoney_golden_settings_a11));
743                 amdgpu_program_register_sequence(adev,
744                                                  stoney_golden_common_all,
745                                                  (const u32)ARRAY_SIZE(stoney_golden_common_all));
746                 break;
747         default:
748                 break;
749         }
750 }
751
752 static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
753 {
754         adev->gfx.scratch.num_reg = 7;
755         adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
756         adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
757 }
758
759 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
760 {
761         struct amdgpu_device *adev = ring->adev;
762         uint32_t scratch;
763         uint32_t tmp = 0;
764         unsigned i;
765         int r;
766
767         r = amdgpu_gfx_scratch_get(adev, &scratch);
768         if (r) {
769                 DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
770                 return r;
771         }
772         WREG32(scratch, 0xCAFEDEAD);
773         r = amdgpu_ring_alloc(ring, 3);
774         if (r) {
775                 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
776                           ring->idx, r);
777                 amdgpu_gfx_scratch_free(adev, scratch);
778                 return r;
779         }
780         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
781         amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
782         amdgpu_ring_write(ring, 0xDEADBEEF);
783         amdgpu_ring_commit(ring);
784
785         for (i = 0; i < adev->usec_timeout; i++) {
786                 tmp = RREG32(scratch);
787                 if (tmp == 0xDEADBEEF)
788                         break;
789                 DRM_UDELAY(1);
790         }
791         if (i < adev->usec_timeout) {
792                 DRM_INFO("ring test on %d succeeded in %d usecs\n",
793                          ring->idx, i);
794         } else {
795                 DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
796                           ring->idx, scratch, tmp);
797                 r = -EINVAL;
798         }
799         amdgpu_gfx_scratch_free(adev, scratch);
800         return r;
801 }
802
803 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
804 {
805         struct amdgpu_device *adev = ring->adev;
806         struct amdgpu_ib ib;
807         struct dma_fence *f = NULL;
808         uint32_t scratch;
809         uint32_t tmp = 0;
810         long r;
811
812         r = amdgpu_gfx_scratch_get(adev, &scratch);
813         if (r) {
814                 DRM_ERROR("amdgpu: failed to get scratch reg (%ld).\n", r);
815                 return r;
816         }
817         WREG32(scratch, 0xCAFEDEAD);
818         memset(&ib, 0, sizeof(ib));
819         r = amdgpu_ib_get(adev, NULL, 256, &ib);
820         if (r) {
821                 DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
822                 goto err1;
823         }
824         ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
825         ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START));
826         ib.ptr[2] = 0xDEADBEEF;
827         ib.length_dw = 3;
828
829         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
830         if (r)
831                 goto err2;
832
833         r = dma_fence_wait_timeout(f, false, timeout);
834         if (r == 0) {
835                 DRM_ERROR("amdgpu: IB test timed out.\n");
836                 r = -ETIMEDOUT;
837                 goto err2;
838         } else if (r < 0) {
839                 DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
840                 goto err2;
841         }
842         tmp = RREG32(scratch);
843         if (tmp == 0xDEADBEEF) {
844                 DRM_INFO("ib test on ring %d succeeded\n", ring->idx);
845                 r = 0;
846         } else {
847                 DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n",
848                           scratch, tmp);
849                 r = -EINVAL;
850         }
851 err2:
852         amdgpu_ib_free(adev, &ib, NULL);
853         dma_fence_put(f);
854 err1:
855         amdgpu_gfx_scratch_free(adev, scratch);
856         return r;
857 }
858
859
860 static void gfx_v8_0_free_microcode(struct amdgpu_device *adev)
861 {
862         release_firmware(adev->gfx.pfp_fw);
863         adev->gfx.pfp_fw = NULL;
864         release_firmware(adev->gfx.me_fw);
865         adev->gfx.me_fw = NULL;
866         release_firmware(adev->gfx.ce_fw);
867         adev->gfx.ce_fw = NULL;
868         release_firmware(adev->gfx.rlc_fw);
869         adev->gfx.rlc_fw = NULL;
870         release_firmware(adev->gfx.mec_fw);
871         adev->gfx.mec_fw = NULL;
872         if ((adev->asic_type != CHIP_STONEY) &&
873             (adev->asic_type != CHIP_TOPAZ))
874                 release_firmware(adev->gfx.mec2_fw);
875         adev->gfx.mec2_fw = NULL;
876
877         kfree(adev->gfx.rlc.register_list_format);
878 }
879
880 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
881 {
882         const char *chip_name;
883         char fw_name[30];
884         int err;
885         struct amdgpu_firmware_info *info = NULL;
886         const struct common_firmware_header *header = NULL;
887         const struct gfx_firmware_header_v1_0 *cp_hdr;
888         const struct rlc_firmware_header_v2_0 *rlc_hdr;
889         unsigned int *tmp = NULL, i;
890
891         DRM_DEBUG("\n");
892
893         switch (adev->asic_type) {
894         case CHIP_TOPAZ:
895                 chip_name = "topaz";
896                 break;
897         case CHIP_TONGA:
898                 chip_name = "tonga";
899                 break;
900         case CHIP_CARRIZO:
901                 chip_name = "carrizo";
902                 break;
903         case CHIP_FIJI:
904                 chip_name = "fiji";
905                 break;
906         case CHIP_POLARIS11:
907                 chip_name = "polaris11";
908                 break;
909         case CHIP_POLARIS10:
910                 chip_name = "polaris10";
911                 break;
912         case CHIP_POLARIS12:
913                 chip_name = "polaris12";
914                 break;
915         case CHIP_STONEY:
916                 chip_name = "stoney";
917                 break;
918         default:
919                 BUG();
920         }
921
922         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
923         err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
924         if (err)
925                 goto out;
926         err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
927         if (err)
928                 goto out;
929         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
930         adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
931         adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
932
933         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
934         err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
935         if (err)
936                 goto out;
937         err = amdgpu_ucode_validate(adev->gfx.me_fw);
938         if (err)
939                 goto out;
940         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
941         adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
942
943         adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
944
945         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
946         err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
947         if (err)
948                 goto out;
949         err = amdgpu_ucode_validate(adev->gfx.ce_fw);
950         if (err)
951                 goto out;
952         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
953         adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
954         adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
955
956         /*
957          * Support for MCBP/Virtualization in combination with chained IBs is
958          * formal released on feature version #46
959          */
960         if (adev->gfx.ce_feature_version >= 46 &&
961             adev->gfx.pfp_feature_version >= 46) {
962                 adev->virt.chained_ib_support = true;
963                 DRM_INFO("Chained IB support enabled!\n");
964         } else
965                 adev->virt.chained_ib_support = false;
966
967         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
968         err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
969         if (err)
970                 goto out;
971         err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
972         rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
973         adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
974         adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
975
976         adev->gfx.rlc.save_and_restore_offset =
977                         le32_to_cpu(rlc_hdr->save_and_restore_offset);
978         adev->gfx.rlc.clear_state_descriptor_offset =
979                         le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
980         adev->gfx.rlc.avail_scratch_ram_locations =
981                         le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
982         adev->gfx.rlc.reg_restore_list_size =
983                         le32_to_cpu(rlc_hdr->reg_restore_list_size);
984         adev->gfx.rlc.reg_list_format_start =
985                         le32_to_cpu(rlc_hdr->reg_list_format_start);
986         adev->gfx.rlc.reg_list_format_separate_start =
987                         le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
988         adev->gfx.rlc.starting_offsets_start =
989                         le32_to_cpu(rlc_hdr->starting_offsets_start);
990         adev->gfx.rlc.reg_list_format_size_bytes =
991                         le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
992         adev->gfx.rlc.reg_list_size_bytes =
993                         le32_to_cpu(rlc_hdr->reg_list_size_bytes);
994
995         adev->gfx.rlc.register_list_format =
996                         kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
997                                         adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
998
999         if (!adev->gfx.rlc.register_list_format) {
1000                 err = -ENOMEM;
1001                 goto out;
1002         }
1003
1004         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1005                         le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1006         for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++)
1007                 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
1008
1009         adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1010
1011         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1012                         le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1013         for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++)
1014                 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
1015
1016         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1017         err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1018         if (err)
1019                 goto out;
1020         err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1021         if (err)
1022                 goto out;
1023         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1024         adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1025         adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1026
1027         if ((adev->asic_type != CHIP_STONEY) &&
1028             (adev->asic_type != CHIP_TOPAZ)) {
1029                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1030                 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1031                 if (!err) {
1032                         err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1033                         if (err)
1034                                 goto out;
1035                         cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1036                                 adev->gfx.mec2_fw->data;
1037                         adev->gfx.mec2_fw_version =
1038                                 le32_to_cpu(cp_hdr->header.ucode_version);
1039                         adev->gfx.mec2_feature_version =
1040                                 le32_to_cpu(cp_hdr->ucode_feature_version);
1041                 } else {
1042                         err = 0;
1043                         adev->gfx.mec2_fw = NULL;
1044                 }
1045         }
1046
1047         if (adev->firmware.load_type == AMDGPU_FW_LOAD_SMU) {
1048                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1049                 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1050                 info->fw = adev->gfx.pfp_fw;
1051                 header = (const struct common_firmware_header *)info->fw->data;
1052                 adev->firmware.fw_size +=
1053                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1054
1055                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1056                 info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1057                 info->fw = adev->gfx.me_fw;
1058                 header = (const struct common_firmware_header *)info->fw->data;
1059                 adev->firmware.fw_size +=
1060                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1061
1062                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1063                 info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1064                 info->fw = adev->gfx.ce_fw;
1065                 header = (const struct common_firmware_header *)info->fw->data;
1066                 adev->firmware.fw_size +=
1067                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1068
1069                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1070                 info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1071                 info->fw = adev->gfx.rlc_fw;
1072                 header = (const struct common_firmware_header *)info->fw->data;
1073                 adev->firmware.fw_size +=
1074                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1075
1076                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1077                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1078                 info->fw = adev->gfx.mec_fw;
1079                 header = (const struct common_firmware_header *)info->fw->data;
1080                 adev->firmware.fw_size +=
1081                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1082
1083                 /* we need account JT in */
1084                 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1085                 adev->firmware.fw_size +=
1086                         ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE);
1087
1088                 if (amdgpu_sriov_vf(adev)) {
1089                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_STORAGE];
1090                         info->ucode_id = AMDGPU_UCODE_ID_STORAGE;
1091                         info->fw = adev->gfx.mec_fw;
1092                         adev->firmware.fw_size +=
1093                                 ALIGN(le32_to_cpu(64 * PAGE_SIZE), PAGE_SIZE);
1094                 }
1095
1096                 if (adev->gfx.mec2_fw) {
1097                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1098                         info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1099                         info->fw = adev->gfx.mec2_fw;
1100                         header = (const struct common_firmware_header *)info->fw->data;
1101                         adev->firmware.fw_size +=
1102                                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1103                 }
1104
1105         }
1106
1107 out:
1108         if (err) {
1109                 dev_err(adev->dev,
1110                         "gfx8: Failed to load firmware \"%s\"\n",
1111                         fw_name);
1112                 release_firmware(adev->gfx.pfp_fw);
1113                 adev->gfx.pfp_fw = NULL;
1114                 release_firmware(adev->gfx.me_fw);
1115                 adev->gfx.me_fw = NULL;
1116                 release_firmware(adev->gfx.ce_fw);
1117                 adev->gfx.ce_fw = NULL;
1118                 release_firmware(adev->gfx.rlc_fw);
1119                 adev->gfx.rlc_fw = NULL;
1120                 release_firmware(adev->gfx.mec_fw);
1121                 adev->gfx.mec_fw = NULL;
1122                 release_firmware(adev->gfx.mec2_fw);
1123                 adev->gfx.mec2_fw = NULL;
1124         }
1125         return err;
1126 }
1127
1128 static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
1129                                     volatile u32 *buffer)
1130 {
1131         u32 count = 0, i;
1132         const struct cs_section_def *sect = NULL;
1133         const struct cs_extent_def *ext = NULL;
1134
1135         if (adev->gfx.rlc.cs_data == NULL)
1136                 return;
1137         if (buffer == NULL)
1138                 return;
1139
1140         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1141         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1142
1143         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1144         buffer[count++] = cpu_to_le32(0x80000000);
1145         buffer[count++] = cpu_to_le32(0x80000000);
1146
1147         for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1148                 for (ext = sect->section; ext->extent != NULL; ++ext) {
1149                         if (sect->id == SECT_CONTEXT) {
1150                                 buffer[count++] =
1151                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1152                                 buffer[count++] = cpu_to_le32(ext->reg_index -
1153                                                 PACKET3_SET_CONTEXT_REG_START);
1154                                 for (i = 0; i < ext->reg_count; i++)
1155                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
1156                         } else {
1157                                 return;
1158                         }
1159                 }
1160         }
1161
1162         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1163         buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
1164                         PACKET3_SET_CONTEXT_REG_START);
1165         buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config);
1166         buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1);
1167
1168         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1169         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1170
1171         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1172         buffer[count++] = cpu_to_le32(0);
1173 }
1174
1175 static void cz_init_cp_jump_table(struct amdgpu_device *adev)
1176 {
1177         const __le32 *fw_data;
1178         volatile u32 *dst_ptr;
1179         int me, i, max_me = 4;
1180         u32 bo_offset = 0;
1181         u32 table_offset, table_size;
1182
1183         if (adev->asic_type == CHIP_CARRIZO)
1184                 max_me = 5;
1185
1186         /* write the cp table buffer */
1187         dst_ptr = adev->gfx.rlc.cp_table_ptr;
1188         for (me = 0; me < max_me; me++) {
1189                 if (me == 0) {
1190                         const struct gfx_firmware_header_v1_0 *hdr =
1191                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1192                         fw_data = (const __le32 *)
1193                                 (adev->gfx.ce_fw->data +
1194                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1195                         table_offset = le32_to_cpu(hdr->jt_offset);
1196                         table_size = le32_to_cpu(hdr->jt_size);
1197                 } else if (me == 1) {
1198                         const struct gfx_firmware_header_v1_0 *hdr =
1199                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1200                         fw_data = (const __le32 *)
1201                                 (adev->gfx.pfp_fw->data +
1202                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1203                         table_offset = le32_to_cpu(hdr->jt_offset);
1204                         table_size = le32_to_cpu(hdr->jt_size);
1205                 } else if (me == 2) {
1206                         const struct gfx_firmware_header_v1_0 *hdr =
1207                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1208                         fw_data = (const __le32 *)
1209                                 (adev->gfx.me_fw->data +
1210                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1211                         table_offset = le32_to_cpu(hdr->jt_offset);
1212                         table_size = le32_to_cpu(hdr->jt_size);
1213                 } else if (me == 3) {
1214                         const struct gfx_firmware_header_v1_0 *hdr =
1215                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1216                         fw_data = (const __le32 *)
1217                                 (adev->gfx.mec_fw->data +
1218                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1219                         table_offset = le32_to_cpu(hdr->jt_offset);
1220                         table_size = le32_to_cpu(hdr->jt_size);
1221                 } else  if (me == 4) {
1222                         const struct gfx_firmware_header_v1_0 *hdr =
1223                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
1224                         fw_data = (const __le32 *)
1225                                 (adev->gfx.mec2_fw->data +
1226                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1227                         table_offset = le32_to_cpu(hdr->jt_offset);
1228                         table_size = le32_to_cpu(hdr->jt_size);
1229                 }
1230
1231                 for (i = 0; i < table_size; i ++) {
1232                         dst_ptr[bo_offset + i] =
1233                                 cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
1234                 }
1235
1236                 bo_offset += table_size;
1237         }
1238 }
1239
1240 static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev)
1241 {
1242         int r;
1243
1244         /* clear state block */
1245         if (adev->gfx.rlc.clear_state_obj) {
1246                 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, true);
1247                 if (unlikely(r != 0))
1248                         dev_warn(adev->dev, "(%d) reserve RLC cbs bo failed\n", r);
1249                 amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
1250                 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1251                 amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
1252                 adev->gfx.rlc.clear_state_obj = NULL;
1253         }
1254
1255         /* jump table block */
1256         if (adev->gfx.rlc.cp_table_obj) {
1257                 r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, true);
1258                 if (unlikely(r != 0))
1259                         dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r);
1260                 amdgpu_bo_unpin(adev->gfx.rlc.cp_table_obj);
1261                 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1262                 amdgpu_bo_unref(&adev->gfx.rlc.cp_table_obj);
1263                 adev->gfx.rlc.cp_table_obj = NULL;
1264         }
1265 }
1266
1267 static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
1268 {
1269         volatile u32 *dst_ptr;
1270         u32 dws;
1271         const struct cs_section_def *cs_data;
1272         int r;
1273
1274         adev->gfx.rlc.cs_data = vi_cs_data;
1275
1276         cs_data = adev->gfx.rlc.cs_data;
1277
1278         if (cs_data) {
1279                 /* clear state block */
1280                 adev->gfx.rlc.clear_state_size = dws = gfx_v8_0_get_csb_size(adev);
1281
1282                 if (adev->gfx.rlc.clear_state_obj == NULL) {
1283                         r = amdgpu_bo_create(adev, dws * 4, PAGE_SIZE, true,
1284                                              AMDGPU_GEM_DOMAIN_VRAM,
1285                                              AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
1286                                              AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
1287                                              NULL, NULL,
1288                                              &adev->gfx.rlc.clear_state_obj);
1289                         if (r) {
1290                                 dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r);
1291                                 gfx_v8_0_rlc_fini(adev);
1292                                 return r;
1293                         }
1294                 }
1295                 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1296                 if (unlikely(r != 0)) {
1297                         gfx_v8_0_rlc_fini(adev);
1298                         return r;
1299                 }
1300                 r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj, AMDGPU_GEM_DOMAIN_VRAM,
1301                                   &adev->gfx.rlc.clear_state_gpu_addr);
1302                 if (r) {
1303                         amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1304                         dev_warn(adev->dev, "(%d) pin RLC cbs bo failed\n", r);
1305                         gfx_v8_0_rlc_fini(adev);
1306                         return r;
1307                 }
1308
1309                 r = amdgpu_bo_kmap(adev->gfx.rlc.clear_state_obj, (void **)&adev->gfx.rlc.cs_ptr);
1310                 if (r) {
1311                         dev_warn(adev->dev, "(%d) map RLC cbs bo failed\n", r);
1312                         gfx_v8_0_rlc_fini(adev);
1313                         return r;
1314                 }
1315                 /* set up the cs buffer */
1316                 dst_ptr = adev->gfx.rlc.cs_ptr;
1317                 gfx_v8_0_get_csb_buffer(adev, dst_ptr);
1318                 amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
1319                 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1320         }
1321
1322         if ((adev->asic_type == CHIP_CARRIZO) ||
1323             (adev->asic_type == CHIP_STONEY)) {
1324                 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1325                 if (adev->gfx.rlc.cp_table_obj == NULL) {
1326                         r = amdgpu_bo_create(adev, adev->gfx.rlc.cp_table_size, PAGE_SIZE, true,
1327                                              AMDGPU_GEM_DOMAIN_VRAM,
1328                                              AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
1329                                              AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
1330                                              NULL, NULL,
1331                                              &adev->gfx.rlc.cp_table_obj);
1332                         if (r) {
1333                                 dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r);
1334                                 return r;
1335                         }
1336                 }
1337
1338                 r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, false);
1339                 if (unlikely(r != 0)) {
1340                         dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r);
1341                         return r;
1342                 }
1343                 r = amdgpu_bo_pin(adev->gfx.rlc.cp_table_obj, AMDGPU_GEM_DOMAIN_VRAM,
1344                                   &adev->gfx.rlc.cp_table_gpu_addr);
1345                 if (r) {
1346                         amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1347                         dev_warn(adev->dev, "(%d) pin RLC cp table bo failed\n", r);
1348                         return r;
1349                 }
1350                 r = amdgpu_bo_kmap(adev->gfx.rlc.cp_table_obj, (void **)&adev->gfx.rlc.cp_table_ptr);
1351                 if (r) {
1352                         dev_warn(adev->dev, "(%d) map RLC cp table bo failed\n", r);
1353                         return r;
1354                 }
1355
1356                 cz_init_cp_jump_table(adev);
1357
1358                 amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj);
1359                 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1360         }
1361
1362         return 0;
1363 }
1364
1365 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
1366 {
1367         int r;
1368
1369         if (adev->gfx.mec.hpd_eop_obj) {
1370                 r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, true);
1371                 if (unlikely(r != 0))
1372                         dev_warn(adev->dev, "(%d) reserve HPD EOP bo failed\n", r);
1373                 amdgpu_bo_unpin(adev->gfx.mec.hpd_eop_obj);
1374                 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1375                 amdgpu_bo_unref(&adev->gfx.mec.hpd_eop_obj);
1376                 adev->gfx.mec.hpd_eop_obj = NULL;
1377         }
1378 }
1379
1380 static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
1381 {
1382         int r;
1383         u32 *hpd;
1384         size_t mec_hpd_size;
1385
1386         bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1387
1388         /* take ownership of the relevant compute queues */
1389         amdgpu_gfx_compute_queue_acquire(adev);
1390
1391         mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE;
1392
1393         if (adev->gfx.mec.hpd_eop_obj == NULL) {
1394                 r = amdgpu_bo_create(adev,
1395                                      mec_hpd_size,
1396                                      PAGE_SIZE, true,
1397                                      AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
1398                                      &adev->gfx.mec.hpd_eop_obj);
1399                 if (r) {
1400                         dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1401                         return r;
1402                 }
1403         }
1404
1405         r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
1406         if (unlikely(r != 0)) {
1407                 gfx_v8_0_mec_fini(adev);
1408                 return r;
1409         }
1410         r = amdgpu_bo_pin(adev->gfx.mec.hpd_eop_obj, AMDGPU_GEM_DOMAIN_GTT,
1411                           &adev->gfx.mec.hpd_eop_gpu_addr);
1412         if (r) {
1413                 dev_warn(adev->dev, "(%d) pin HDP EOP bo failed\n", r);
1414                 gfx_v8_0_mec_fini(adev);
1415                 return r;
1416         }
1417         r = amdgpu_bo_kmap(adev->gfx.mec.hpd_eop_obj, (void **)&hpd);
1418         if (r) {
1419                 dev_warn(adev->dev, "(%d) map HDP EOP bo failed\n", r);
1420                 gfx_v8_0_mec_fini(adev);
1421                 return r;
1422         }
1423
1424         memset(hpd, 0, mec_hpd_size);
1425
1426         amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1427         amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1428
1429         return 0;
1430 }
1431
1432 static const u32 vgpr_init_compute_shader[] =
1433 {
1434         0x7e000209, 0x7e020208,
1435         0x7e040207, 0x7e060206,
1436         0x7e080205, 0x7e0a0204,
1437         0x7e0c0203, 0x7e0e0202,
1438         0x7e100201, 0x7e120200,
1439         0x7e140209, 0x7e160208,
1440         0x7e180207, 0x7e1a0206,
1441         0x7e1c0205, 0x7e1e0204,
1442         0x7e200203, 0x7e220202,
1443         0x7e240201, 0x7e260200,
1444         0x7e280209, 0x7e2a0208,
1445         0x7e2c0207, 0x7e2e0206,
1446         0x7e300205, 0x7e320204,
1447         0x7e340203, 0x7e360202,
1448         0x7e380201, 0x7e3a0200,
1449         0x7e3c0209, 0x7e3e0208,
1450         0x7e400207, 0x7e420206,
1451         0x7e440205, 0x7e460204,
1452         0x7e480203, 0x7e4a0202,
1453         0x7e4c0201, 0x7e4e0200,
1454         0x7e500209, 0x7e520208,
1455         0x7e540207, 0x7e560206,
1456         0x7e580205, 0x7e5a0204,
1457         0x7e5c0203, 0x7e5e0202,
1458         0x7e600201, 0x7e620200,
1459         0x7e640209, 0x7e660208,
1460         0x7e680207, 0x7e6a0206,
1461         0x7e6c0205, 0x7e6e0204,
1462         0x7e700203, 0x7e720202,
1463         0x7e740201, 0x7e760200,
1464         0x7e780209, 0x7e7a0208,
1465         0x7e7c0207, 0x7e7e0206,
1466         0xbf8a0000, 0xbf810000,
1467 };
1468
1469 static const u32 sgpr_init_compute_shader[] =
1470 {
1471         0xbe8a0100, 0xbe8c0102,
1472         0xbe8e0104, 0xbe900106,
1473         0xbe920108, 0xbe940100,
1474         0xbe960102, 0xbe980104,
1475         0xbe9a0106, 0xbe9c0108,
1476         0xbe9e0100, 0xbea00102,
1477         0xbea20104, 0xbea40106,
1478         0xbea60108, 0xbea80100,
1479         0xbeaa0102, 0xbeac0104,
1480         0xbeae0106, 0xbeb00108,
1481         0xbeb20100, 0xbeb40102,
1482         0xbeb60104, 0xbeb80106,
1483         0xbeba0108, 0xbebc0100,
1484         0xbebe0102, 0xbec00104,
1485         0xbec20106, 0xbec40108,
1486         0xbec60100, 0xbec80102,
1487         0xbee60004, 0xbee70005,
1488         0xbeea0006, 0xbeeb0007,
1489         0xbee80008, 0xbee90009,
1490         0xbefc0000, 0xbf8a0000,
1491         0xbf810000, 0x00000000,
1492 };
1493
1494 static const u32 vgpr_init_regs[] =
1495 {
1496         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1497         mmCOMPUTE_RESOURCE_LIMITS, 0,
1498         mmCOMPUTE_NUM_THREAD_X, 256*4,
1499         mmCOMPUTE_NUM_THREAD_Y, 1,
1500         mmCOMPUTE_NUM_THREAD_Z, 1,
1501         mmCOMPUTE_PGM_RSRC2, 20,
1502         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1503         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1504         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1505         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1506         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1507         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1508         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1509         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1510         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1511         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1512 };
1513
1514 static const u32 sgpr1_init_regs[] =
1515 {
1516         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1517         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1518         mmCOMPUTE_NUM_THREAD_X, 256*5,
1519         mmCOMPUTE_NUM_THREAD_Y, 1,
1520         mmCOMPUTE_NUM_THREAD_Z, 1,
1521         mmCOMPUTE_PGM_RSRC2, 20,
1522         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1523         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1524         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1525         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1526         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1527         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1528         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1529         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1530         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1531         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1532 };
1533
1534 static const u32 sgpr2_init_regs[] =
1535 {
1536         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1537         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1538         mmCOMPUTE_NUM_THREAD_X, 256*5,
1539         mmCOMPUTE_NUM_THREAD_Y, 1,
1540         mmCOMPUTE_NUM_THREAD_Z, 1,
1541         mmCOMPUTE_PGM_RSRC2, 20,
1542         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1543         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1544         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1545         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1546         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1547         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1548         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1549         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1550         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1551         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1552 };
1553
1554 static const u32 sec_ded_counter_registers[] =
1555 {
1556         mmCPC_EDC_ATC_CNT,
1557         mmCPC_EDC_SCRATCH_CNT,
1558         mmCPC_EDC_UCODE_CNT,
1559         mmCPF_EDC_ATC_CNT,
1560         mmCPF_EDC_ROQ_CNT,
1561         mmCPF_EDC_TAG_CNT,
1562         mmCPG_EDC_ATC_CNT,
1563         mmCPG_EDC_DMA_CNT,
1564         mmCPG_EDC_TAG_CNT,
1565         mmDC_EDC_CSINVOC_CNT,
1566         mmDC_EDC_RESTORE_CNT,
1567         mmDC_EDC_STATE_CNT,
1568         mmGDS_EDC_CNT,
1569         mmGDS_EDC_GRBM_CNT,
1570         mmGDS_EDC_OA_DED,
1571         mmSPI_EDC_CNT,
1572         mmSQC_ATC_EDC_GATCL1_CNT,
1573         mmSQC_EDC_CNT,
1574         mmSQ_EDC_DED_CNT,
1575         mmSQ_EDC_INFO,
1576         mmSQ_EDC_SEC_CNT,
1577         mmTCC_EDC_CNT,
1578         mmTCP_ATC_EDC_GATCL1_CNT,
1579         mmTCP_EDC_CNT,
1580         mmTD_EDC_CNT
1581 };
1582
1583 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1584 {
1585         struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1586         struct amdgpu_ib ib;
1587         struct dma_fence *f = NULL;
1588         int r, i;
1589         u32 tmp;
1590         unsigned total_size, vgpr_offset, sgpr_offset;
1591         u64 gpu_addr;
1592
1593         /* only supported on CZ */
1594         if (adev->asic_type != CHIP_CARRIZO)
1595                 return 0;
1596
1597         /* bail if the compute ring is not ready */
1598         if (!ring->ready)
1599                 return 0;
1600
1601         tmp = RREG32(mmGB_EDC_MODE);
1602         WREG32(mmGB_EDC_MODE, 0);
1603
1604         total_size =
1605                 (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1606         total_size +=
1607                 (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1608         total_size +=
1609                 (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1610         total_size = ALIGN(total_size, 256);
1611         vgpr_offset = total_size;
1612         total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1613         sgpr_offset = total_size;
1614         total_size += sizeof(sgpr_init_compute_shader);
1615
1616         /* allocate an indirect buffer to put the commands in */
1617         memset(&ib, 0, sizeof(ib));
1618         r = amdgpu_ib_get(adev, NULL, total_size, &ib);
1619         if (r) {
1620                 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1621                 return r;
1622         }
1623
1624         /* load the compute shaders */
1625         for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1626                 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1627
1628         for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1629                 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1630
1631         /* init the ib length to 0 */
1632         ib.length_dw = 0;
1633
1634         /* VGPR */
1635         /* write the register state for the compute dispatch */
1636         for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1637                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1638                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1639                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1640         }
1641         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1642         gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1643         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1644         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1645         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1646         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1647
1648         /* write dispatch packet */
1649         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1650         ib.ptr[ib.length_dw++] = 8; /* x */
1651         ib.ptr[ib.length_dw++] = 1; /* y */
1652         ib.ptr[ib.length_dw++] = 1; /* z */
1653         ib.ptr[ib.length_dw++] =
1654                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1655
1656         /* write CS partial flush packet */
1657         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1658         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1659
1660         /* SGPR1 */
1661         /* write the register state for the compute dispatch */
1662         for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1663                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1664                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1665                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1666         }
1667         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1668         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1669         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1670         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1671         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1672         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1673
1674         /* write dispatch packet */
1675         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1676         ib.ptr[ib.length_dw++] = 8; /* x */
1677         ib.ptr[ib.length_dw++] = 1; /* y */
1678         ib.ptr[ib.length_dw++] = 1; /* z */
1679         ib.ptr[ib.length_dw++] =
1680                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1681
1682         /* write CS partial flush packet */
1683         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1684         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1685
1686         /* SGPR2 */
1687         /* write the register state for the compute dispatch */
1688         for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1689                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1690                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1691                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1692         }
1693         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1694         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1695         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1696         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1697         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1698         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1699
1700         /* write dispatch packet */
1701         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1702         ib.ptr[ib.length_dw++] = 8; /* x */
1703         ib.ptr[ib.length_dw++] = 1; /* y */
1704         ib.ptr[ib.length_dw++] = 1; /* z */
1705         ib.ptr[ib.length_dw++] =
1706                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1707
1708         /* write CS partial flush packet */
1709         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1710         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1711
1712         /* shedule the ib on the ring */
1713         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1714         if (r) {
1715                 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1716                 goto fail;
1717         }
1718
1719         /* wait for the GPU to finish processing the IB */
1720         r = dma_fence_wait(f, false);
1721         if (r) {
1722                 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1723                 goto fail;
1724         }
1725
1726         tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1727         tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1728         WREG32(mmGB_EDC_MODE, tmp);
1729
1730         tmp = RREG32(mmCC_GC_EDC_CONFIG);
1731         tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1732         WREG32(mmCC_GC_EDC_CONFIG, tmp);
1733
1734
1735         /* read back registers to clear the counters */
1736         for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1737                 RREG32(sec_ded_counter_registers[i]);
1738
1739 fail:
1740         amdgpu_ib_free(adev, &ib, NULL);
1741         dma_fence_put(f);
1742
1743         return r;
1744 }
1745
1746 static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1747 {
1748         u32 gb_addr_config;
1749         u32 mc_shared_chmap, mc_arb_ramcfg;
1750         u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1751         u32 tmp;
1752         int ret;
1753
1754         switch (adev->asic_type) {
1755         case CHIP_TOPAZ:
1756                 adev->gfx.config.max_shader_engines = 1;
1757                 adev->gfx.config.max_tile_pipes = 2;
1758                 adev->gfx.config.max_cu_per_sh = 6;
1759                 adev->gfx.config.max_sh_per_se = 1;
1760                 adev->gfx.config.max_backends_per_se = 2;
1761                 adev->gfx.config.max_texture_channel_caches = 2;
1762                 adev->gfx.config.max_gprs = 256;
1763                 adev->gfx.config.max_gs_threads = 32;
1764                 adev->gfx.config.max_hw_contexts = 8;
1765
1766                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1767                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1768                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1769                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1770                 gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1771                 break;
1772         case CHIP_FIJI:
1773                 adev->gfx.config.max_shader_engines = 4;
1774                 adev->gfx.config.max_tile_pipes = 16;
1775                 adev->gfx.config.max_cu_per_sh = 16;
1776                 adev->gfx.config.max_sh_per_se = 1;
1777                 adev->gfx.config.max_backends_per_se = 4;
1778                 adev->gfx.config.max_texture_channel_caches = 16;
1779                 adev->gfx.config.max_gprs = 256;
1780                 adev->gfx.config.max_gs_threads = 32;
1781                 adev->gfx.config.max_hw_contexts = 8;
1782
1783                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1784                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1785                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1786                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1787                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1788                 break;
1789         case CHIP_POLARIS11:
1790         case CHIP_POLARIS12:
1791                 ret = amdgpu_atombios_get_gfx_info(adev);
1792                 if (ret)
1793                         return ret;
1794                 adev->gfx.config.max_gprs = 256;
1795                 adev->gfx.config.max_gs_threads = 32;
1796                 adev->gfx.config.max_hw_contexts = 8;
1797
1798                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1799                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1800                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1801                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1802                 gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
1803                 break;
1804         case CHIP_POLARIS10:
1805                 ret = amdgpu_atombios_get_gfx_info(adev);
1806                 if (ret)
1807                         return ret;
1808                 adev->gfx.config.max_gprs = 256;
1809                 adev->gfx.config.max_gs_threads = 32;
1810                 adev->gfx.config.max_hw_contexts = 8;
1811
1812                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1813                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1814                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1815                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1816                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1817                 break;
1818         case CHIP_TONGA:
1819                 adev->gfx.config.max_shader_engines = 4;
1820                 adev->gfx.config.max_tile_pipes = 8;
1821                 adev->gfx.config.max_cu_per_sh = 8;
1822                 adev->gfx.config.max_sh_per_se = 1;
1823                 adev->gfx.config.max_backends_per_se = 2;
1824                 adev->gfx.config.max_texture_channel_caches = 8;
1825                 adev->gfx.config.max_gprs = 256;
1826                 adev->gfx.config.max_gs_threads = 32;
1827                 adev->gfx.config.max_hw_contexts = 8;
1828
1829                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1830                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1831                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1832                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1833                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1834                 break;
1835         case CHIP_CARRIZO:
1836                 adev->gfx.config.max_shader_engines = 1;
1837                 adev->gfx.config.max_tile_pipes = 2;
1838                 adev->gfx.config.max_sh_per_se = 1;
1839                 adev->gfx.config.max_backends_per_se = 2;
1840                 adev->gfx.config.max_cu_per_sh = 8;
1841                 adev->gfx.config.max_texture_channel_caches = 2;
1842                 adev->gfx.config.max_gprs = 256;
1843                 adev->gfx.config.max_gs_threads = 32;
1844                 adev->gfx.config.max_hw_contexts = 8;
1845
1846                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1847                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1848                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1849                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1850                 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1851                 break;
1852         case CHIP_STONEY:
1853                 adev->gfx.config.max_shader_engines = 1;
1854                 adev->gfx.config.max_tile_pipes = 2;
1855                 adev->gfx.config.max_sh_per_se = 1;
1856                 adev->gfx.config.max_backends_per_se = 1;
1857                 adev->gfx.config.max_cu_per_sh = 3;
1858                 adev->gfx.config.max_texture_channel_caches = 2;
1859                 adev->gfx.config.max_gprs = 256;
1860                 adev->gfx.config.max_gs_threads = 16;
1861                 adev->gfx.config.max_hw_contexts = 8;
1862
1863                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1864                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1865                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1866                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1867                 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1868                 break;
1869         default:
1870                 adev->gfx.config.max_shader_engines = 2;
1871                 adev->gfx.config.max_tile_pipes = 4;
1872                 adev->gfx.config.max_cu_per_sh = 2;
1873                 adev->gfx.config.max_sh_per_se = 1;
1874                 adev->gfx.config.max_backends_per_se = 2;
1875                 adev->gfx.config.max_texture_channel_caches = 4;
1876                 adev->gfx.config.max_gprs = 256;
1877                 adev->gfx.config.max_gs_threads = 32;
1878                 adev->gfx.config.max_hw_contexts = 8;
1879
1880                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1881                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1882                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1883                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1884                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1885                 break;
1886         }
1887
1888         mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
1889         adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1890         mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1891
1892         adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1893         adev->gfx.config.mem_max_burst_length_bytes = 256;
1894         if (adev->flags & AMD_IS_APU) {
1895                 /* Get memory bank mapping mode. */
1896                 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
1897                 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1898                 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1899
1900                 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
1901                 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1902                 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1903
1904                 /* Validate settings in case only one DIMM installed. */
1905                 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
1906                         dimm00_addr_map = 0;
1907                 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
1908                         dimm01_addr_map = 0;
1909                 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
1910                         dimm10_addr_map = 0;
1911                 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
1912                         dimm11_addr_map = 0;
1913
1914                 /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1915                 /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1916                 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
1917                         adev->gfx.config.mem_row_size_in_kb = 2;
1918                 else
1919                         adev->gfx.config.mem_row_size_in_kb = 1;
1920         } else {
1921                 tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
1922                 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1923                 if (adev->gfx.config.mem_row_size_in_kb > 4)
1924                         adev->gfx.config.mem_row_size_in_kb = 4;
1925         }
1926
1927         adev->gfx.config.shader_engine_tile_size = 32;
1928         adev->gfx.config.num_gpus = 1;
1929         adev->gfx.config.multi_gpu_tile_size = 64;
1930
1931         /* fix up row size */
1932         switch (adev->gfx.config.mem_row_size_in_kb) {
1933         case 1:
1934         default:
1935                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
1936                 break;
1937         case 2:
1938                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
1939                 break;
1940         case 4:
1941                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
1942                 break;
1943         }
1944         adev->gfx.config.gb_addr_config = gb_addr_config;
1945
1946         return 0;
1947 }
1948
1949 static int gfx_v8_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
1950                                         int mec, int pipe, int queue)
1951 {
1952         int r;
1953         unsigned irq_type;
1954         struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
1955
1956         ring = &adev->gfx.compute_ring[ring_id];
1957
1958         /* mec0 is me1 */
1959         ring->me = mec + 1;
1960         ring->pipe = pipe;
1961         ring->queue = queue;
1962
1963         ring->ring_obj = NULL;
1964         ring->use_doorbell = true;
1965         ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + ring_id;
1966         ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
1967                                 + (ring_id * GFX8_MEC_HPD_SIZE);
1968         sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
1969
1970         irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
1971                 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
1972                 + ring->pipe;
1973
1974         /* type-2 packets are deprecated on MEC, use type-3 instead */
1975         r = amdgpu_ring_init(adev, ring, 1024,
1976                         &adev->gfx.eop_irq, irq_type);
1977         if (r)
1978                 return r;
1979
1980
1981         return 0;
1982 }
1983
1984 static int gfx_v8_0_sw_init(void *handle)
1985 {
1986         int i, j, k, r, ring_id;
1987         struct amdgpu_ring *ring;
1988         struct amdgpu_kiq *kiq;
1989         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1990
1991         switch (adev->asic_type) {
1992         case CHIP_FIJI:
1993         case CHIP_TONGA:
1994         case CHIP_POLARIS11:
1995         case CHIP_POLARIS12:
1996         case CHIP_POLARIS10:
1997         case CHIP_CARRIZO:
1998                 adev->gfx.mec.num_mec = 2;
1999                 break;
2000         case CHIP_TOPAZ:
2001         case CHIP_STONEY:
2002         default:
2003                 adev->gfx.mec.num_mec = 1;
2004                 break;
2005         }
2006
2007         adev->gfx.mec.num_pipe_per_mec = 4;
2008         adev->gfx.mec.num_queue_per_pipe = 8;
2009
2010         /* KIQ event */
2011         r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 178, &adev->gfx.kiq.irq);
2012         if (r)
2013                 return r;
2014
2015         /* EOP Event */
2016         r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 181, &adev->gfx.eop_irq);
2017         if (r)
2018                 return r;
2019
2020         /* Privileged reg */
2021         r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 184,
2022                               &adev->gfx.priv_reg_irq);
2023         if (r)
2024                 return r;
2025
2026         /* Privileged inst */
2027         r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 185,
2028                               &adev->gfx.priv_inst_irq);
2029         if (r)
2030                 return r;
2031
2032         adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2033
2034         gfx_v8_0_scratch_init(adev);
2035
2036         r = gfx_v8_0_init_microcode(adev);
2037         if (r) {
2038                 DRM_ERROR("Failed to load gfx firmware!\n");
2039                 return r;
2040         }
2041
2042         r = gfx_v8_0_rlc_init(adev);
2043         if (r) {
2044                 DRM_ERROR("Failed to init rlc BOs!\n");
2045                 return r;
2046         }
2047
2048         r = gfx_v8_0_mec_init(adev);
2049         if (r) {
2050                 DRM_ERROR("Failed to init MEC BOs!\n");
2051                 return r;
2052         }
2053
2054         /* set up the gfx ring */
2055         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2056                 ring = &adev->gfx.gfx_ring[i];
2057                 ring->ring_obj = NULL;
2058                 sprintf(ring->name, "gfx");
2059                 /* no gfx doorbells on iceland */
2060                 if (adev->asic_type != CHIP_TOPAZ) {
2061                         ring->use_doorbell = true;
2062                         ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0;
2063                 }
2064
2065                 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2066                                      AMDGPU_CP_IRQ_GFX_EOP);
2067                 if (r)
2068                         return r;
2069         }
2070
2071
2072         /* set up the compute queues - allocate horizontally across pipes */
2073         ring_id = 0;
2074         for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
2075                 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
2076                         for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
2077                                 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
2078                                         continue;
2079
2080                                 r = gfx_v8_0_compute_ring_init(adev,
2081                                                                 ring_id,
2082                                                                 i, k, j);
2083                                 if (r)
2084                                         return r;
2085
2086                                 ring_id++;
2087                         }
2088                 }
2089         }
2090
2091         r = amdgpu_gfx_kiq_init(adev, GFX8_MEC_HPD_SIZE);
2092         if (r) {
2093                 DRM_ERROR("Failed to init KIQ BOs!\n");
2094                 return r;
2095         }
2096
2097         kiq = &adev->gfx.kiq;
2098         r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
2099         if (r)
2100                 return r;
2101
2102         /* create MQD for all compute queues as well as KIQ for SRIOV case */
2103         r = amdgpu_gfx_compute_mqd_sw_init(adev, sizeof(struct vi_mqd));
2104         if (r)
2105                 return r;
2106
2107         /* reserve GDS, GWS and OA resource for gfx */
2108         r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size,
2109                                     PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS,
2110                                     &adev->gds.gds_gfx_bo, NULL, NULL);
2111         if (r)
2112                 return r;
2113
2114         r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size,
2115                                     PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS,
2116                                     &adev->gds.gws_gfx_bo, NULL, NULL);
2117         if (r)
2118                 return r;
2119
2120         r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size,
2121                                     PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA,
2122                                     &adev->gds.oa_gfx_bo, NULL, NULL);
2123         if (r)
2124                 return r;
2125
2126         adev->gfx.ce_ram_size = 0x8000;
2127
2128         r = gfx_v8_0_gpu_early_init(adev);
2129         if (r)
2130                 return r;
2131
2132         return 0;
2133 }
2134
2135 static int gfx_v8_0_sw_fini(void *handle)
2136 {
2137         int i;
2138         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2139
2140         amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
2141         amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL);
2142         amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
2143
2144         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2145                 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2146         for (i = 0; i < adev->gfx.num_compute_rings; i++)
2147                 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2148
2149         amdgpu_gfx_compute_mqd_sw_fini(adev);
2150         amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
2151         amdgpu_gfx_kiq_fini(adev);
2152
2153         gfx_v8_0_mec_fini(adev);
2154         gfx_v8_0_rlc_fini(adev);
2155         gfx_v8_0_free_microcode(adev);
2156
2157         return 0;
2158 }
2159
2160 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
2161 {
2162         uint32_t *modearray, *mod2array;
2163         const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
2164         const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
2165         u32 reg_offset;
2166
2167         modearray = adev->gfx.config.tile_mode_array;
2168         mod2array = adev->gfx.config.macrotile_mode_array;
2169
2170         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2171                 modearray[reg_offset] = 0;
2172
2173         for (reg_offset = 0; reg_offset <  num_secondary_tile_mode_states; reg_offset++)
2174                 mod2array[reg_offset] = 0;
2175
2176         switch (adev->asic_type) {
2177         case CHIP_TOPAZ:
2178                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2179                                 PIPE_CONFIG(ADDR_SURF_P2) |
2180                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2181                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2182                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2183                                 PIPE_CONFIG(ADDR_SURF_P2) |
2184                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2185                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2186                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2187                                 PIPE_CONFIG(ADDR_SURF_P2) |
2188                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2189                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2190                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2191                                 PIPE_CONFIG(ADDR_SURF_P2) |
2192                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2193                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2194                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2195                                 PIPE_CONFIG(ADDR_SURF_P2) |
2196                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2197                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2198                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2199                                 PIPE_CONFIG(ADDR_SURF_P2) |
2200                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2201                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2202                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2203                                 PIPE_CONFIG(ADDR_SURF_P2) |
2204                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2205                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2206                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2207                                 PIPE_CONFIG(ADDR_SURF_P2));
2208                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2209                                 PIPE_CONFIG(ADDR_SURF_P2) |
2210                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2211                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2212                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2213                                  PIPE_CONFIG(ADDR_SURF_P2) |
2214                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2215                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2216                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2217                                  PIPE_CONFIG(ADDR_SURF_P2) |
2218                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2219                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2220                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2221                                  PIPE_CONFIG(ADDR_SURF_P2) |
2222                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2223                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2224                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2225                                  PIPE_CONFIG(ADDR_SURF_P2) |
2226                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2227                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2228                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2229                                  PIPE_CONFIG(ADDR_SURF_P2) |
2230                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2231                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2232                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2233                                  PIPE_CONFIG(ADDR_SURF_P2) |
2234                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2235                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2236                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2237                                  PIPE_CONFIG(ADDR_SURF_P2) |
2238                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2239                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2240                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2241                                  PIPE_CONFIG(ADDR_SURF_P2) |
2242                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2243                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2244                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2245                                  PIPE_CONFIG(ADDR_SURF_P2) |
2246                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2247                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2248                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2249                                  PIPE_CONFIG(ADDR_SURF_P2) |
2250                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2251                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2252                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2253                                  PIPE_CONFIG(ADDR_SURF_P2) |
2254                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2255                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2256                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2257                                  PIPE_CONFIG(ADDR_SURF_P2) |
2258                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2259                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2260                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2261                                  PIPE_CONFIG(ADDR_SURF_P2) |
2262                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2263                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2264                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2265                                  PIPE_CONFIG(ADDR_SURF_P2) |
2266                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2267                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2268                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2269                                  PIPE_CONFIG(ADDR_SURF_P2) |
2270                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2271                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2272                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2273                                  PIPE_CONFIG(ADDR_SURF_P2) |
2274                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2275                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2276                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2277                                  PIPE_CONFIG(ADDR_SURF_P2) |
2278                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2279                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2280
2281                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2282                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2283                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2284                                 NUM_BANKS(ADDR_SURF_8_BANK));
2285                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2286                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2287                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2288                                 NUM_BANKS(ADDR_SURF_8_BANK));
2289                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2290                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2291                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2292                                 NUM_BANKS(ADDR_SURF_8_BANK));
2293                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2294                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2295                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2296                                 NUM_BANKS(ADDR_SURF_8_BANK));
2297                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2298                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2299                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2300                                 NUM_BANKS(ADDR_SURF_8_BANK));
2301                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2302                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2303                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2304                                 NUM_BANKS(ADDR_SURF_8_BANK));
2305                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2306                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2307                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2308                                 NUM_BANKS(ADDR_SURF_8_BANK));
2309                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2310                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2311                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2312                                 NUM_BANKS(ADDR_SURF_16_BANK));
2313                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2314                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2315                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2316                                 NUM_BANKS(ADDR_SURF_16_BANK));
2317                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2318                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2319                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2320                                  NUM_BANKS(ADDR_SURF_16_BANK));
2321                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2322                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2323                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2324                                  NUM_BANKS(ADDR_SURF_16_BANK));
2325                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2326                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2327                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2328                                  NUM_BANKS(ADDR_SURF_16_BANK));
2329                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2330                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2331                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2332                                  NUM_BANKS(ADDR_SURF_16_BANK));
2333                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2334                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2335                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2336                                  NUM_BANKS(ADDR_SURF_8_BANK));
2337
2338                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2339                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2340                             reg_offset != 23)
2341                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2342
2343                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2344                         if (reg_offset != 7)
2345                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2346
2347                 break;
2348         case CHIP_FIJI:
2349                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2350                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2351                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2352                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2353                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2354                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2355                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2356                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2357                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2358                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2359                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2360                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2361                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2362                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2363                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2364                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2365                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2366                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2367                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2368                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2369                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2370                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2371                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2372                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2373                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2374                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2375                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2376                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2377                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2378                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2379                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2380                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2381                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2382                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2383                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2384                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2385                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2386                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2387                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2388                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2389                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2390                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2391                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2392                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2393                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2394                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2395                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2396                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2397                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2398                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2399                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2400                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2401                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2402                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2403                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2404                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2405                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2406                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2407                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2408                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2409                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2410                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2411                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2412                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2413                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2414                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2415                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2416                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2417                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2418                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2419                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2420                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2421                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2422                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2423                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2424                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2425                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2426                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2427                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2428                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2429                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2430                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2431                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2432                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2433                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2434                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2435                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2436                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2437                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2438                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2439                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2440                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2441                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2442                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2443                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2444                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2445                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2446                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2447                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2448                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2449                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2450                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2451                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2452                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2453                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2454                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2455                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2456                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2457                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2458                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2459                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2460                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2461                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2462                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2463                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2464                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2465                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2466                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2467                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2468                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2469                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2470                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2471
2472                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2473                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2474                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2475                                 NUM_BANKS(ADDR_SURF_8_BANK));
2476                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2477                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2478                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2479                                 NUM_BANKS(ADDR_SURF_8_BANK));
2480                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2481                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2482                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2483                                 NUM_BANKS(ADDR_SURF_8_BANK));
2484                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2485                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2486                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2487                                 NUM_BANKS(ADDR_SURF_8_BANK));
2488                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2489                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2490                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2491                                 NUM_BANKS(ADDR_SURF_8_BANK));
2492                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2493                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2494                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2495                                 NUM_BANKS(ADDR_SURF_8_BANK));
2496                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2497                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2498                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2499                                 NUM_BANKS(ADDR_SURF_8_BANK));
2500                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2501                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2502                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2503                                 NUM_BANKS(ADDR_SURF_8_BANK));
2504                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2505                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2506                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2507                                 NUM_BANKS(ADDR_SURF_8_BANK));
2508                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2509                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2510                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2511                                  NUM_BANKS(ADDR_SURF_8_BANK));
2512                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2513                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2514                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2515                                  NUM_BANKS(ADDR_SURF_8_BANK));
2516                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2517                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2518                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2519                                  NUM_BANKS(ADDR_SURF_8_BANK));
2520                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2521                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2522                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2523                                  NUM_BANKS(ADDR_SURF_8_BANK));
2524                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2525                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2526                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2527                                  NUM_BANKS(ADDR_SURF_4_BANK));
2528
2529                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2530                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2531
2532                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2533                         if (reg_offset != 7)
2534                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2535
2536                 break;
2537         case CHIP_TONGA:
2538                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2539                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2540                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2541                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2542                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2543                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2544                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2545                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2546                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2547                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2548                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2549                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2550                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2551                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2552                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2553                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2554                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2555                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2556                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2557                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2558                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2559                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2560                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2561                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2562                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2563                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2564                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2565                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2566                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2567                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2568                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2569                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2570                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2571                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2572                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2573                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2574                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2575                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2576                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2577                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2578                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2579                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2580                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2581                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2582                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2583                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2584                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2585                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2586                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2587                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2588                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2589                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2590                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2591                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2592                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2593                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2594                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2595                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2596                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2597                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2598                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2599                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2600                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2601                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2602                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2603                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2604                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2605                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2606                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2607                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2608                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2609                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2610                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2611                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2612                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2613                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2614                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2615                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2616                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2617                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2618                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2619                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2620                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2621                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2622                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2623                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2624                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2625                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2626                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2627                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2628                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2629                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2630                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2631                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2632                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2633                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2634                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2635                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2636                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2637                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2638                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2639                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2640                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2641                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2642                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2643                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2644                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2645                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2646                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2647                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2648                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2649                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2650                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2651                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2652                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2653                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2654                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2655                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2656                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2657                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2658                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2659                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2660
2661                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2662                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2663                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2664                                 NUM_BANKS(ADDR_SURF_16_BANK));
2665                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2666                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2667                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2668                                 NUM_BANKS(ADDR_SURF_16_BANK));
2669                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2670                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2671                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2672                                 NUM_BANKS(ADDR_SURF_16_BANK));
2673                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2674                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2675                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2676                                 NUM_BANKS(ADDR_SURF_16_BANK));
2677                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2678                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2679                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2680                                 NUM_BANKS(ADDR_SURF_16_BANK));
2681                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2682                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2683                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2684                                 NUM_BANKS(ADDR_SURF_16_BANK));
2685                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2686                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2687                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2688                                 NUM_BANKS(ADDR_SURF_16_BANK));
2689                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2690                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2691                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2692                                 NUM_BANKS(ADDR_SURF_16_BANK));
2693                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2694                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2695                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2696                                 NUM_BANKS(ADDR_SURF_16_BANK));
2697                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2698                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2699                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2700                                  NUM_BANKS(ADDR_SURF_16_BANK));
2701                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2702                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2703                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2704                                  NUM_BANKS(ADDR_SURF_16_BANK));
2705                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2706                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2707                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2708                                  NUM_BANKS(ADDR_SURF_8_BANK));
2709                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2710                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2711                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2712                                  NUM_BANKS(ADDR_SURF_4_BANK));
2713                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2714                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2715                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2716                                  NUM_BANKS(ADDR_SURF_4_BANK));
2717
2718                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2719                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2720
2721                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2722                         if (reg_offset != 7)
2723                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2724
2725                 break;
2726         case CHIP_POLARIS11:
2727         case CHIP_POLARIS12:
2728                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2729                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2730                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2731                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2732                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2733                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2734                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2735                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2736                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2737                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2738                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2739                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2740                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2741                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2742                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2743                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2744                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2745                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2746                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2747                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2748                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2749                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2750                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2751                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2752                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2753                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2754                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2755                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2756                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2757                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2758                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2759                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2760                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2761                                 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2762                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2763                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2764                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2765                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2766                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2767                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2768                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2769                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2770                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2771                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2772                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2773                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2774                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2775                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2776                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2777                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2778                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2779                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2780                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2781                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2782                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2783                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2784                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2785                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2786                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2787                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2788                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2789                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2790                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2791                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2792                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2793                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2794                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2795                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2796                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2797                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2798                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2799                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2800                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2801                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2802                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2803                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2804                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2805                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2806                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2807                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2808                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2809                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2810                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2811                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2812                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2813                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2814                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2815                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2816                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2817                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2818                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2819                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2820                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2821                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2822                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2823                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2824                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2825                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2826                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2827                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2828                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2829                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2830                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2831                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2832                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2833                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2834                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2835                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2836                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2837                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2838                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2839                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2840                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2841                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2842                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2843                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2844                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2845                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2846                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2847                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2848                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2849                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2850
2851                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2852                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2853                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2854                                 NUM_BANKS(ADDR_SURF_16_BANK));
2855
2856                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2857                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2858                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2859                                 NUM_BANKS(ADDR_SURF_16_BANK));
2860
2861                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2862                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2863                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2864                                 NUM_BANKS(ADDR_SURF_16_BANK));
2865
2866                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2867                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2868                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2869                                 NUM_BANKS(ADDR_SURF_16_BANK));
2870
2871                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2872                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2873                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2874                                 NUM_BANKS(ADDR_SURF_16_BANK));
2875
2876                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2877                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2878                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2879                                 NUM_BANKS(ADDR_SURF_16_BANK));
2880
2881                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2882                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2883                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2884                                 NUM_BANKS(ADDR_SURF_16_BANK));
2885
2886                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2887                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2888                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2889                                 NUM_BANKS(ADDR_SURF_16_BANK));
2890
2891                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2892                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2893                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2894                                 NUM_BANKS(ADDR_SURF_16_BANK));
2895
2896                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2897                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2898                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2899                                 NUM_BANKS(ADDR_SURF_16_BANK));
2900
2901                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2902                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2903                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2904                                 NUM_BANKS(ADDR_SURF_16_BANK));
2905
2906                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2907                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2908                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2909                                 NUM_BANKS(ADDR_SURF_16_BANK));
2910
2911                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2912                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2913                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2914                                 NUM_BANKS(ADDR_SURF_8_BANK));
2915
2916                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2917                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2918                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2919                                 NUM_BANKS(ADDR_SURF_4_BANK));
2920
2921                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2922                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2923
2924                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2925                         if (reg_offset != 7)
2926                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2927
2928                 break;
2929         case CHIP_POLARIS10:
2930                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2931                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2932                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2933                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2934                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2935                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2936                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2937                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2938                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2939                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2940                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2941                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2942                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2943                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2944                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2945                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2946                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2947                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2948                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2949                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2950                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2951                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2952                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2953                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2954                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2955                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2956                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2957                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2958                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2959                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2960                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2961                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2962                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2963                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2964                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2965                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2966                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2967                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2968                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2969                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2970                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2971                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2972                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2973                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2974                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2975                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2976                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2977                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2978                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2979                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2980                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2981                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2982                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2983                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2984                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2985                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2986                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2987                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2988                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2989                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2990                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2991                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2992                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2993                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2994                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2995                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2996                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2997                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2998                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2999                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3000                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3001                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3002                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3003                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3004                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3005                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3006                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3007                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3008                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3009                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3010                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3011                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3012                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3013                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3014                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3015                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3016                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3017                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3018                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3019                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3020                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3021                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3022                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3023                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3024                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3025                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3026                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3027                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3028                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3029                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3030                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3031                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3032                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3033                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3034                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3035                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3036                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3037                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3038                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3039                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3040                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3041                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3042                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3043                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3044                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3045                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3046                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3047                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3048                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3049                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3050                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3051                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3052
3053                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3054                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3055                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3056                                 NUM_BANKS(ADDR_SURF_16_BANK));
3057
3058                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3059                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3060                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3061                                 NUM_BANKS(ADDR_SURF_16_BANK));
3062
3063                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3064                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3065                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3066                                 NUM_BANKS(ADDR_SURF_16_BANK));
3067
3068                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3069                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3070                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3071                                 NUM_BANKS(ADDR_SURF_16_BANK));
3072
3073                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3074                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3075                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3076                                 NUM_BANKS(ADDR_SURF_16_BANK));
3077
3078                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3079                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3080                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3081                                 NUM_BANKS(ADDR_SURF_16_BANK));
3082
3083                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3084                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3085                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3086                                 NUM_BANKS(ADDR_SURF_16_BANK));
3087
3088                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3089                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3090                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3091                                 NUM_BANKS(ADDR_SURF_16_BANK));
3092
3093                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3094                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3095                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3096                                 NUM_BANKS(ADDR_SURF_16_BANK));
3097
3098                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3099                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3100                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3101                                 NUM_BANKS(ADDR_SURF_16_BANK));
3102
3103                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3104                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3105                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3106                                 NUM_BANKS(ADDR_SURF_16_BANK));
3107
3108                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3109                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3110                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3111                                 NUM_BANKS(ADDR_SURF_8_BANK));
3112
3113                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3114                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3115                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3116                                 NUM_BANKS(ADDR_SURF_4_BANK));
3117
3118                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3119                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3120                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3121                                 NUM_BANKS(ADDR_SURF_4_BANK));
3122
3123                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3124                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3125
3126                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3127                         if (reg_offset != 7)
3128                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3129
3130                 break;
3131         case CHIP_STONEY:
3132                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3133                                 PIPE_CONFIG(ADDR_SURF_P2) |
3134                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3135                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3136                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3137                                 PIPE_CONFIG(ADDR_SURF_P2) |
3138                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3139                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3140                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3141                                 PIPE_CONFIG(ADDR_SURF_P2) |
3142                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3143                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3144                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3145                                 PIPE_CONFIG(ADDR_SURF_P2) |
3146                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3147                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3148                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3149                                 PIPE_CONFIG(ADDR_SURF_P2) |
3150                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3151                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3152                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3153                                 PIPE_CONFIG(ADDR_SURF_P2) |
3154                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3155                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3156                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3157                                 PIPE_CONFIG(ADDR_SURF_P2) |
3158                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3159                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3160                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3161                                 PIPE_CONFIG(ADDR_SURF_P2));
3162                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3163                                 PIPE_CONFIG(ADDR_SURF_P2) |
3164                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3165                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3166                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3167                                  PIPE_CONFIG(ADDR_SURF_P2) |
3168                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3169                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3170                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3171                                  PIPE_CONFIG(ADDR_SURF_P2) |
3172                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3173                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3174                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3175                                  PIPE_CONFIG(ADDR_SURF_P2) |
3176                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3177                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3178                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3179                                  PIPE_CONFIG(ADDR_SURF_P2) |
3180                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3181                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3182                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3183                                  PIPE_CONFIG(ADDR_SURF_P2) |
3184                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3185                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3186                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3187                                  PIPE_CONFIG(ADDR_SURF_P2) |
3188                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3189                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3190                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3191                                  PIPE_CONFIG(ADDR_SURF_P2) |
3192                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3193                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3194                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3195                                  PIPE_CONFIG(ADDR_SURF_P2) |
3196                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3197                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3198                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3199                                  PIPE_CONFIG(ADDR_SURF_P2) |
3200                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3201                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3202                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3203                                  PIPE_CONFIG(ADDR_SURF_P2) |
3204                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3205                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3206                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3207                                  PIPE_CONFIG(ADDR_SURF_P2) |
3208                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3209                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3210                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3211                                  PIPE_CONFIG(ADDR_SURF_P2) |
3212                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3213                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3214                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3215                                  PIPE_CONFIG(ADDR_SURF_P2) |
3216                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3217                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3218                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3219                                  PIPE_CONFIG(ADDR_SURF_P2) |
3220                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3221                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3222                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3223                                  PIPE_CONFIG(ADDR_SURF_P2) |
3224                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3225                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3226                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3227                                  PIPE_CONFIG(ADDR_SURF_P2) |
3228                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3229                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3230                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3231                                  PIPE_CONFIG(ADDR_SURF_P2) |
3232                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3233                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3234
3235                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3236                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3237                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3238                                 NUM_BANKS(ADDR_SURF_8_BANK));
3239                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3240                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3241                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3242                                 NUM_BANKS(ADDR_SURF_8_BANK));
3243                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3244                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3245                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3246                                 NUM_BANKS(ADDR_SURF_8_BANK));
3247                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3248                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3249                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3250                                 NUM_BANKS(ADDR_SURF_8_BANK));
3251                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3252                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3253                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3254                                 NUM_BANKS(ADDR_SURF_8_BANK));
3255                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3256                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3257                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3258                                 NUM_BANKS(ADDR_SURF_8_BANK));
3259                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3260                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3261                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3262                                 NUM_BANKS(ADDR_SURF_8_BANK));
3263                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3264                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3265                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3266                                 NUM_BANKS(ADDR_SURF_16_BANK));
3267                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3268                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3269                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3270                                 NUM_BANKS(ADDR_SURF_16_BANK));
3271                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3272                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3273                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3274                                  NUM_BANKS(ADDR_SURF_16_BANK));
3275                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3276                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3277                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3278                                  NUM_BANKS(ADDR_SURF_16_BANK));
3279                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3280                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3281                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3282                                  NUM_BANKS(ADDR_SURF_16_BANK));
3283                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3284                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3285                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3286                                  NUM_BANKS(ADDR_SURF_16_BANK));
3287                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3288                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3289                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3290                                  NUM_BANKS(ADDR_SURF_8_BANK));
3291
3292                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3293                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3294                             reg_offset != 23)
3295                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3296
3297                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3298                         if (reg_offset != 7)
3299                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3300
3301                 break;
3302         default:
3303                 dev_warn(adev->dev,
3304                          "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
3305                          adev->asic_type);
3306
3307         case CHIP_CARRIZO:
3308                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3309                                 PIPE_CONFIG(ADDR_SURF_P2) |
3310                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3311                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3312                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3313                                 PIPE_CONFIG(ADDR_SURF_P2) |
3314                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3315                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3316                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3317                                 PIPE_CONFIG(ADDR_SURF_P2) |
3318                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3319                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3320                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3321                                 PIPE_CONFIG(ADDR_SURF_P2) |
3322                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3323                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3324                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3325                                 PIPE_CONFIG(ADDR_SURF_P2) |
3326                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3327                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3328                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3329                                 PIPE_CONFIG(ADDR_SURF_P2) |
3330                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3331                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3332                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3333                                 PIPE_CONFIG(ADDR_SURF_P2) |
3334                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3335                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3336                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3337                                 PIPE_CONFIG(ADDR_SURF_P2));
3338                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3339                                 PIPE_CONFIG(ADDR_SURF_P2) |
3340                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3341                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3342                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3343                                  PIPE_CONFIG(ADDR_SURF_P2) |
3344                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3345                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3346                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3347                                  PIPE_CONFIG(ADDR_SURF_P2) |
3348                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3349                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3350                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3351                                  PIPE_CONFIG(ADDR_SURF_P2) |
3352                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3353                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3354                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3355                                  PIPE_CONFIG(ADDR_SURF_P2) |
3356                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3357                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3358                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3359                                  PIPE_CONFIG(ADDR_SURF_P2) |
3360                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3361                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3362                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3363                                  PIPE_CONFIG(ADDR_SURF_P2) |
3364                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3365                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3366                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3367                                  PIPE_CONFIG(ADDR_SURF_P2) |
3368                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3369                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3370                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3371                                  PIPE_CONFIG(ADDR_SURF_P2) |
3372                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3373                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3374                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3375                                  PIPE_CONFIG(ADDR_SURF_P2) |
3376                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3377                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3378                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3379                                  PIPE_CONFIG(ADDR_SURF_P2) |
3380                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3381                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3382                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3383                                  PIPE_CONFIG(ADDR_SURF_P2) |
3384                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3385                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3386                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3387                                  PIPE_CONFIG(ADDR_SURF_P2) |
3388                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3389                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3390                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3391                                  PIPE_CONFIG(ADDR_SURF_P2) |
3392                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3393                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3394                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3395                                  PIPE_CONFIG(ADDR_SURF_P2) |
3396                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3397                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3398                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3399                                  PIPE_CONFIG(ADDR_SURF_P2) |
3400                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3401                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3402                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3403                                  PIPE_CONFIG(ADDR_SURF_P2) |
3404                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3405                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3406                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3407                                  PIPE_CONFIG(ADDR_SURF_P2) |
3408                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3409                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3410
3411                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3412                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3413                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3414                                 NUM_BANKS(ADDR_SURF_8_BANK));
3415                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3416                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3417                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3418                                 NUM_BANKS(ADDR_SURF_8_BANK));
3419                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3420                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3421                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3422                                 NUM_BANKS(ADDR_SURF_8_BANK));
3423                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3424                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3425                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3426                                 NUM_BANKS(ADDR_SURF_8_BANK));
3427                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3428                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3429                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3430                                 NUM_BANKS(ADDR_SURF_8_BANK));
3431                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3432                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3433                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3434                                 NUM_BANKS(ADDR_SURF_8_BANK));
3435                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3436                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3437                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3438                                 NUM_BANKS(ADDR_SURF_8_BANK));
3439                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3440                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3441                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3442                                 NUM_BANKS(ADDR_SURF_16_BANK));
3443                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3444                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3445                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3446                                 NUM_BANKS(ADDR_SURF_16_BANK));
3447                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3448                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3449                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3450                                  NUM_BANKS(ADDR_SURF_16_BANK));
3451                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3452                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3453                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3454                                  NUM_BANKS(ADDR_SURF_16_BANK));
3455                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3456                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3457                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3458                                  NUM_BANKS(ADDR_SURF_16_BANK));
3459                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3460                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3461                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3462                                  NUM_BANKS(ADDR_SURF_16_BANK));
3463                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3464                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3465                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3466                                  NUM_BANKS(ADDR_SURF_8_BANK));
3467
3468                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3469                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3470                             reg_offset != 23)
3471                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3472
3473                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3474                         if (reg_offset != 7)
3475                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3476
3477                 break;
3478         }
3479 }
3480
3481 static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
3482                                   u32 se_num, u32 sh_num, u32 instance)
3483 {
3484         u32 data;
3485
3486         if (instance == 0xffffffff)
3487                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
3488         else
3489                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
3490
3491         if (se_num == 0xffffffff)
3492                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3493         else
3494                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3495
3496         if (sh_num == 0xffffffff)
3497                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3498         else
3499                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3500
3501         WREG32(mmGRBM_GFX_INDEX, data);
3502 }
3503
3504 static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
3505 {
3506         u32 data, mask;
3507
3508         data =  RREG32(mmCC_RB_BACKEND_DISABLE) |
3509                 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3510
3511         data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE);
3512
3513         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
3514                                          adev->gfx.config.max_sh_per_se);
3515
3516         return (~data) & mask;
3517 }
3518
3519 static void
3520 gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1)
3521 {
3522         switch (adev->asic_type) {
3523         case CHIP_FIJI:
3524                 *rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) |
3525                           RB_XSEL2(1) | PKR_MAP(2) |
3526                           PKR_XSEL(1) | PKR_YSEL(1) |
3527                           SE_MAP(2) | SE_XSEL(2) | SE_YSEL(3);
3528                 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) |
3529                            SE_PAIR_YSEL(2);
3530                 break;
3531         case CHIP_TONGA:
3532         case CHIP_POLARIS10:
3533                 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3534                           SE_XSEL(1) | SE_YSEL(1);
3535                 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(2) |
3536                            SE_PAIR_YSEL(2);
3537                 break;
3538         case CHIP_TOPAZ:
3539         case CHIP_CARRIZO:
3540                 *rconf |= RB_MAP_PKR0(2);
3541                 *rconf1 |= 0x0;
3542                 break;
3543         case CHIP_POLARIS11:
3544         case CHIP_POLARIS12:
3545                 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3546                           SE_XSEL(1) | SE_YSEL(1);
3547                 *rconf1 |= 0x0;
3548                 break;
3549         case CHIP_STONEY:
3550                 *rconf |= 0x0;
3551                 *rconf1 |= 0x0;
3552                 break;
3553         default:
3554                 DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type);
3555                 break;
3556         }
3557 }
3558
3559 static void
3560 gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev,
3561                                         u32 raster_config, u32 raster_config_1,
3562                                         unsigned rb_mask, unsigned num_rb)
3563 {
3564         unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1);
3565         unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1);
3566         unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2);
3567         unsigned rb_per_se = num_rb / num_se;
3568         unsigned se_mask[4];
3569         unsigned se;
3570
3571         se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask;
3572         se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask;
3573         se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask;
3574         se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask;
3575
3576         WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4));
3577         WARN_ON(!(sh_per_se == 1 || sh_per_se == 2));
3578         WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2));
3579
3580         if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
3581                              (!se_mask[2] && !se_mask[3]))) {
3582                 raster_config_1 &= ~SE_PAIR_MAP_MASK;
3583
3584                 if (!se_mask[0] && !se_mask[1]) {
3585                         raster_config_1 |=
3586                                 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3);
3587                 } else {
3588                         raster_config_1 |=
3589                                 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0);
3590                 }
3591         }
3592
3593         for (se = 0; se < num_se; se++) {
3594                 unsigned raster_config_se = raster_config;
3595                 unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
3596                 unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
3597                 int idx = (se / 2) * 2;
3598
3599                 if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
3600                         raster_config_se &= ~SE_MAP_MASK;
3601
3602                         if (!se_mask[idx]) {
3603                                 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3);
3604                         } else {
3605                                 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0);
3606                         }
3607                 }
3608
3609                 pkr0_mask &= rb_mask;
3610                 pkr1_mask &= rb_mask;
3611                 if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
3612                         raster_config_se &= ~PKR_MAP_MASK;
3613
3614                         if (!pkr0_mask) {
3615                                 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3);
3616                         } else {
3617                                 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0);
3618                         }
3619                 }
3620
3621                 if (rb_per_se >= 2) {
3622                         unsigned rb0_mask = 1 << (se * rb_per_se);
3623                         unsigned rb1_mask = rb0_mask << 1;
3624
3625                         rb0_mask &= rb_mask;
3626                         rb1_mask &= rb_mask;
3627                         if (!rb0_mask || !rb1_mask) {
3628                                 raster_config_se &= ~RB_MAP_PKR0_MASK;
3629
3630                                 if (!rb0_mask) {
3631                                         raster_config_se |=
3632                                                 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3);
3633                                 } else {
3634                                         raster_config_se |=
3635                                                 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0);
3636                                 }
3637                         }
3638
3639                         if (rb_per_se > 2) {
3640                                 rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
3641                                 rb1_mask = rb0_mask << 1;
3642                                 rb0_mask &= rb_mask;
3643                                 rb1_mask &= rb_mask;
3644                                 if (!rb0_mask || !rb1_mask) {
3645                                         raster_config_se &= ~RB_MAP_PKR1_MASK;
3646
3647                                         if (!rb0_mask) {
3648                                                 raster_config_se |=
3649                                                         RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3);
3650                                         } else {
3651                                                 raster_config_se |=
3652                                                         RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0);
3653                                         }
3654                                 }
3655                         }
3656                 }
3657
3658                 /* GRBM_GFX_INDEX has a different offset on VI */
3659                 gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff);
3660                 WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se);
3661                 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3662         }
3663
3664         /* GRBM_GFX_INDEX has a different offset on VI */
3665         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3666 }
3667
3668 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
3669 {
3670         int i, j;
3671         u32 data;
3672         u32 raster_config = 0, raster_config_1 = 0;
3673         u32 active_rbs = 0;
3674         u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
3675                                         adev->gfx.config.max_sh_per_se;
3676         unsigned num_rb_pipes;
3677
3678         mutex_lock(&adev->grbm_idx_mutex);
3679         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3680                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3681                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3682                         data = gfx_v8_0_get_rb_active_bitmap(adev);
3683                         active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
3684                                                rb_bitmap_width_per_sh);
3685                 }
3686         }
3687         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3688
3689         adev->gfx.config.backend_enable_mask = active_rbs;
3690         adev->gfx.config.num_rbs = hweight32(active_rbs);
3691
3692         num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se *
3693                              adev->gfx.config.max_shader_engines, 16);
3694
3695         gfx_v8_0_raster_config(adev, &raster_config, &raster_config_1);
3696
3697         if (!adev->gfx.config.backend_enable_mask ||
3698                         adev->gfx.config.num_rbs >= num_rb_pipes) {
3699                 WREG32(mmPA_SC_RASTER_CONFIG, raster_config);
3700                 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3701         } else {
3702                 gfx_v8_0_write_harvested_raster_configs(adev, raster_config, raster_config_1,
3703                                                         adev->gfx.config.backend_enable_mask,
3704                                                         num_rb_pipes);
3705         }
3706
3707         /* cache the values for userspace */
3708         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3709                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3710                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3711                         adev->gfx.config.rb_config[i][j].rb_backend_disable =
3712                                 RREG32(mmCC_RB_BACKEND_DISABLE);
3713                         adev->gfx.config.rb_config[i][j].user_rb_backend_disable =
3714                                 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3715                         adev->gfx.config.rb_config[i][j].raster_config =
3716                                 RREG32(mmPA_SC_RASTER_CONFIG);
3717                         adev->gfx.config.rb_config[i][j].raster_config_1 =
3718                                 RREG32(mmPA_SC_RASTER_CONFIG_1);
3719                 }
3720         }
3721         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3722         mutex_unlock(&adev->grbm_idx_mutex);
3723 }
3724
3725 /**
3726  * gfx_v8_0_init_compute_vmid - gart enable
3727  *
3728  * @adev: amdgpu_device pointer
3729  *
3730  * Initialize compute vmid sh_mem registers
3731  *
3732  */
3733 #define DEFAULT_SH_MEM_BASES    (0x6000)
3734 #define FIRST_COMPUTE_VMID      (8)
3735 #define LAST_COMPUTE_VMID       (16)
3736 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
3737 {
3738         int i;
3739         uint32_t sh_mem_config;
3740         uint32_t sh_mem_bases;
3741
3742         /*
3743          * Configure apertures:
3744          * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
3745          * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
3746          * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
3747          */
3748         sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
3749
3750         sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
3751                         SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
3752                         SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
3753                         SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
3754                         MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
3755                         SH_MEM_CONFIG__PRIVATE_ATC_MASK;
3756
3757         mutex_lock(&adev->srbm_mutex);
3758         for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
3759                 vi_srbm_select(adev, 0, 0, 0, i);
3760                 /* CP and shaders */
3761                 WREG32(mmSH_MEM_CONFIG, sh_mem_config);
3762                 WREG32(mmSH_MEM_APE1_BASE, 1);
3763                 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3764                 WREG32(mmSH_MEM_BASES, sh_mem_bases);
3765         }
3766         vi_srbm_select(adev, 0, 0, 0, 0);
3767         mutex_unlock(&adev->srbm_mutex);
3768 }
3769
3770 static void gfx_v8_0_config_init(struct amdgpu_device *adev)
3771 {
3772         switch (adev->asic_type) {
3773         default:
3774                 adev->gfx.config.double_offchip_lds_buf = 1;
3775                 break;
3776         case CHIP_CARRIZO:
3777         case CHIP_STONEY:
3778                 adev->gfx.config.double_offchip_lds_buf = 0;
3779                 break;
3780         }
3781 }
3782
3783 static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
3784 {
3785         u32 tmp, sh_static_mem_cfg;
3786         int i;
3787
3788         WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF);
3789         WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3790         WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3791         WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
3792
3793         gfx_v8_0_tiling_mode_table_init(adev);
3794         gfx_v8_0_setup_rb(adev);
3795         gfx_v8_0_get_cu_info(adev);
3796         gfx_v8_0_config_init(adev);
3797
3798         /* XXX SH_MEM regs */
3799         /* where to put LDS, scratch, GPUVM in FSA64 space */
3800         sh_static_mem_cfg = REG_SET_FIELD(0, SH_STATIC_MEM_CONFIG,
3801                                    SWIZZLE_ENABLE, 1);
3802         sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3803                                    ELEMENT_SIZE, 1);
3804         sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3805                                    INDEX_STRIDE, 3);
3806         mutex_lock(&adev->srbm_mutex);
3807         for (i = 0; i < adev->vm_manager.id_mgr[0].num_ids; i++) {
3808                 vi_srbm_select(adev, 0, 0, 0, i);
3809                 /* CP and shaders */
3810                 if (i == 0) {
3811                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
3812                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3813                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3814                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3815                         WREG32(mmSH_MEM_CONFIG, tmp);
3816                         WREG32(mmSH_MEM_BASES, 0);
3817                 } else {
3818                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
3819                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3820                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3821                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3822                         WREG32(mmSH_MEM_CONFIG, tmp);
3823                         tmp = adev->mc.shared_aperture_start >> 48;
3824                         WREG32(mmSH_MEM_BASES, tmp);
3825                 }
3826
3827                 WREG32(mmSH_MEM_APE1_BASE, 1);
3828                 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3829                 WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg);
3830         }
3831         vi_srbm_select(adev, 0, 0, 0, 0);
3832         mutex_unlock(&adev->srbm_mutex);
3833
3834         gfx_v8_0_init_compute_vmid(adev);
3835
3836         mutex_lock(&adev->grbm_idx_mutex);
3837         /*
3838          * making sure that the following register writes will be broadcasted
3839          * to all the shaders
3840          */
3841         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3842
3843         WREG32(mmPA_SC_FIFO_SIZE,
3844                    (adev->gfx.config.sc_prim_fifo_size_frontend <<
3845                         PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
3846                    (adev->gfx.config.sc_prim_fifo_size_backend <<
3847                         PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
3848                    (adev->gfx.config.sc_hiz_tile_fifo_size <<
3849                         PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
3850                    (adev->gfx.config.sc_earlyz_tile_fifo_size <<
3851                         PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
3852
3853         tmp = RREG32(mmSPI_ARB_PRIORITY);
3854         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS0, 2);
3855         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS1, 2);
3856         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS2, 2);
3857         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS3, 2);
3858         WREG32(mmSPI_ARB_PRIORITY, tmp);
3859
3860         mutex_unlock(&adev->grbm_idx_mutex);
3861
3862 }
3863
3864 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3865 {
3866         u32 i, j, k;
3867         u32 mask;
3868
3869         mutex_lock(&adev->grbm_idx_mutex);
3870         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3871                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3872                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3873                         for (k = 0; k < adev->usec_timeout; k++) {
3874                                 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3875                                         break;
3876                                 udelay(1);
3877                         }
3878                 }
3879         }
3880         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3881         mutex_unlock(&adev->grbm_idx_mutex);
3882
3883         mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3884                 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3885                 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3886                 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3887         for (k = 0; k < adev->usec_timeout; k++) {
3888                 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3889                         break;
3890                 udelay(1);
3891         }
3892 }
3893
3894 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3895                                                bool enable)
3896 {
3897         u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3898
3899         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
3900         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
3901         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
3902         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
3903
3904         WREG32(mmCP_INT_CNTL_RING0, tmp);
3905 }
3906
3907 static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
3908 {
3909         /* csib */
3910         WREG32(mmRLC_CSIB_ADDR_HI,
3911                         adev->gfx.rlc.clear_state_gpu_addr >> 32);
3912         WREG32(mmRLC_CSIB_ADDR_LO,
3913                         adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
3914         WREG32(mmRLC_CSIB_LENGTH,
3915                         adev->gfx.rlc.clear_state_size);
3916 }
3917
3918 static void gfx_v8_0_parse_ind_reg_list(int *register_list_format,
3919                                 int ind_offset,
3920                                 int list_size,
3921                                 int *unique_indices,
3922                                 int *indices_count,
3923                                 int max_indices,
3924                                 int *ind_start_offsets,
3925                                 int *offset_count,
3926                                 int max_offset)
3927 {
3928         int indices;
3929         bool new_entry = true;
3930
3931         for (; ind_offset < list_size; ind_offset++) {
3932
3933                 if (new_entry) {
3934                         new_entry = false;
3935                         ind_start_offsets[*offset_count] = ind_offset;
3936                         *offset_count = *offset_count + 1;
3937                         BUG_ON(*offset_count >= max_offset);
3938                 }
3939
3940                 if (register_list_format[ind_offset] == 0xFFFFFFFF) {
3941                         new_entry = true;
3942                         continue;
3943                 }
3944
3945                 ind_offset += 2;
3946
3947                 /* look for the matching indice */
3948                 for (indices = 0;
3949                         indices < *indices_count;
3950                         indices++) {
3951                         if (unique_indices[indices] ==
3952                                 register_list_format[ind_offset])
3953                                 break;
3954                 }
3955
3956                 if (indices >= *indices_count) {
3957                         unique_indices[*indices_count] =
3958                                 register_list_format[ind_offset];
3959                         indices = *indices_count;
3960                         *indices_count = *indices_count + 1;
3961                         BUG_ON(*indices_count >= max_indices);
3962                 }
3963
3964                 register_list_format[ind_offset] = indices;
3965         }
3966 }
3967
3968 static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
3969 {
3970         int i, temp, data;
3971         int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0};
3972         int indices_count = 0;
3973         int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3974         int offset_count = 0;
3975
3976         int list_size;
3977         unsigned int *register_list_format =
3978                 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
3979         if (!register_list_format)
3980                 return -ENOMEM;
3981         memcpy(register_list_format, adev->gfx.rlc.register_list_format,
3982                         adev->gfx.rlc.reg_list_format_size_bytes);
3983
3984         gfx_v8_0_parse_ind_reg_list(register_list_format,
3985                                 RLC_FormatDirectRegListLength,
3986                                 adev->gfx.rlc.reg_list_format_size_bytes >> 2,
3987                                 unique_indices,
3988                                 &indices_count,
3989                                 sizeof(unique_indices) / sizeof(int),
3990                                 indirect_start_offsets,
3991                                 &offset_count,
3992                                 sizeof(indirect_start_offsets)/sizeof(int));
3993
3994         /* save and restore list */
3995         WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1);
3996
3997         WREG32(mmRLC_SRM_ARAM_ADDR, 0);
3998         for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
3999                 WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]);
4000
4001         /* indirect list */
4002         WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start);
4003         for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
4004                 WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]);
4005
4006         list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
4007         list_size = list_size >> 1;
4008         WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size);
4009         WREG32(mmRLC_GPM_SCRATCH_DATA, list_size);
4010
4011         /* starting offsets starts */
4012         WREG32(mmRLC_GPM_SCRATCH_ADDR,
4013                 adev->gfx.rlc.starting_offsets_start);
4014         for (i = 0; i < sizeof(indirect_start_offsets)/sizeof(int); i++)
4015                 WREG32(mmRLC_GPM_SCRATCH_DATA,
4016                                 indirect_start_offsets[i]);
4017
4018         /* unique indices */
4019         temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
4020         data = mmRLC_SRM_INDEX_CNTL_DATA_0;
4021         for (i = 0; i < sizeof(unique_indices) / sizeof(int); i++) {
4022                 if (unique_indices[i] != 0) {
4023                         WREG32(temp + i, unique_indices[i] & 0x3FFFF);
4024                         WREG32(data + i, unique_indices[i] >> 20);
4025                 }
4026         }
4027         kfree(register_list_format);
4028
4029         return 0;
4030 }
4031
4032 static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
4033 {
4034         WREG32_FIELD(RLC_SRM_CNTL, SRM_ENABLE, 1);
4035 }
4036
4037 static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev)
4038 {
4039         uint32_t data;
4040
4041         WREG32_FIELD(CP_RB_WPTR_POLL_CNTL, IDLE_POLL_COUNT, 0x60);
4042
4043         data = REG_SET_FIELD(0, RLC_PG_DELAY, POWER_UP_DELAY, 0x10);
4044         data = REG_SET_FIELD(data, RLC_PG_DELAY, POWER_DOWN_DELAY, 0x10);
4045         data = REG_SET_FIELD(data, RLC_PG_DELAY, CMD_PROPAGATE_DELAY, 0x10);
4046         data = REG_SET_FIELD(data, RLC_PG_DELAY, MEM_SLEEP_DELAY, 0x10);
4047         WREG32(mmRLC_PG_DELAY, data);
4048
4049         WREG32_FIELD(RLC_PG_DELAY_2, SERDES_CMD_DELAY, 0x3);
4050         WREG32_FIELD(RLC_AUTO_PG_CTRL, GRBM_REG_SAVE_GFX_IDLE_THRESHOLD, 0x55f0);
4051
4052 }
4053
4054 static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
4055                                                 bool enable)
4056 {
4057         WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PU_ENABLE, enable ? 1 : 0);
4058 }
4059
4060 static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
4061                                                   bool enable)
4062 {
4063         WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PD_ENABLE, enable ? 1 : 0);
4064 }
4065
4066 static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable)
4067 {
4068         WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 0 : 1);
4069 }
4070
4071 static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
4072 {
4073         if ((adev->asic_type == CHIP_CARRIZO) ||
4074             (adev->asic_type == CHIP_STONEY)) {
4075                 gfx_v8_0_init_csb(adev);
4076                 gfx_v8_0_init_save_restore_list(adev);
4077                 gfx_v8_0_enable_save_restore_machine(adev);
4078                 WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
4079                 gfx_v8_0_init_power_gating(adev);
4080                 WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
4081         } else if ((adev->asic_type == CHIP_POLARIS11) ||
4082                    (adev->asic_type == CHIP_POLARIS12)) {
4083                 gfx_v8_0_init_csb(adev);
4084                 gfx_v8_0_init_save_restore_list(adev);
4085                 gfx_v8_0_enable_save_restore_machine(adev);
4086                 gfx_v8_0_init_power_gating(adev);
4087         }
4088
4089 }
4090
4091 static void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
4092 {
4093         WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 0);
4094
4095         gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4096         gfx_v8_0_wait_for_rlc_serdes(adev);
4097 }
4098
4099 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
4100 {
4101         WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4102         udelay(50);
4103
4104         WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
4105         udelay(50);
4106 }
4107
4108 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
4109 {
4110         WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 1);
4111
4112         /* carrizo do enable cp interrupt after cp inited */
4113         if (!(adev->flags & AMD_IS_APU))
4114                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4115
4116         udelay(50);
4117 }
4118
4119 static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev)
4120 {
4121         const struct rlc_firmware_header_v2_0 *hdr;
4122         const __le32 *fw_data;
4123         unsigned i, fw_size;
4124
4125         if (!adev->gfx.rlc_fw)
4126                 return -EINVAL;
4127
4128         hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
4129         amdgpu_ucode_print_rlc_hdr(&hdr->header);
4130
4131         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
4132                            le32_to_cpu(hdr->header.ucode_array_offset_bytes));
4133         fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
4134
4135         WREG32(mmRLC_GPM_UCODE_ADDR, 0);
4136         for (i = 0; i < fw_size; i++)
4137                 WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
4138         WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
4139
4140         return 0;
4141 }
4142
4143 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
4144 {
4145         int r;
4146         u32 tmp;
4147
4148         gfx_v8_0_rlc_stop(adev);
4149
4150         /* disable CG */
4151         tmp = RREG32(mmRLC_CGCG_CGLS_CTRL);
4152         tmp &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
4153                  RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4154         WREG32(mmRLC_CGCG_CGLS_CTRL, tmp);
4155         if (adev->asic_type == CHIP_POLARIS11 ||
4156             adev->asic_type == CHIP_POLARIS10 ||
4157             adev->asic_type == CHIP_POLARIS12) {
4158                 tmp = RREG32(mmRLC_CGCG_CGLS_CTRL_3D);
4159                 tmp &= ~0x3;
4160                 WREG32(mmRLC_CGCG_CGLS_CTRL_3D, tmp);
4161         }
4162
4163         /* disable PG */
4164         WREG32(mmRLC_PG_CNTL, 0);
4165
4166         gfx_v8_0_rlc_reset(adev);
4167         gfx_v8_0_init_pg(adev);
4168
4169         if (!adev->pp_enabled) {
4170                 if (adev->firmware.load_type != AMDGPU_FW_LOAD_SMU) {
4171                         /* legacy rlc firmware loading */
4172                         r = gfx_v8_0_rlc_load_microcode(adev);
4173                         if (r)
4174                                 return r;
4175                 } else {
4176                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4177                                                         AMDGPU_UCODE_ID_RLC_G);
4178                         if (r)
4179                                 return -EINVAL;
4180                 }
4181         }
4182
4183         gfx_v8_0_rlc_start(adev);
4184
4185         return 0;
4186 }
4187
4188 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
4189 {
4190         int i;
4191         u32 tmp = RREG32(mmCP_ME_CNTL);
4192
4193         if (enable) {
4194                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
4195                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
4196                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
4197         } else {
4198                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
4199                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
4200                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
4201                 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
4202                         adev->gfx.gfx_ring[i].ready = false;
4203         }
4204         WREG32(mmCP_ME_CNTL, tmp);
4205         udelay(50);
4206 }
4207
4208 static int gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
4209 {
4210         const struct gfx_firmware_header_v1_0 *pfp_hdr;
4211         const struct gfx_firmware_header_v1_0 *ce_hdr;
4212         const struct gfx_firmware_header_v1_0 *me_hdr;
4213         const __le32 *fw_data;
4214         unsigned i, fw_size;
4215
4216         if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
4217                 return -EINVAL;
4218
4219         pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
4220                 adev->gfx.pfp_fw->data;
4221         ce_hdr = (const struct gfx_firmware_header_v1_0 *)
4222                 adev->gfx.ce_fw->data;
4223         me_hdr = (const struct gfx_firmware_header_v1_0 *)
4224                 adev->gfx.me_fw->data;
4225
4226         amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
4227         amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
4228         amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
4229
4230         gfx_v8_0_cp_gfx_enable(adev, false);
4231
4232         /* PFP */
4233         fw_data = (const __le32 *)
4234                 (adev->gfx.pfp_fw->data +
4235                  le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
4236         fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
4237         WREG32(mmCP_PFP_UCODE_ADDR, 0);
4238         for (i = 0; i < fw_size; i++)
4239                 WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
4240         WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
4241
4242         /* CE */
4243         fw_data = (const __le32 *)
4244                 (adev->gfx.ce_fw->data +
4245                  le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
4246         fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
4247         WREG32(mmCP_CE_UCODE_ADDR, 0);
4248         for (i = 0; i < fw_size; i++)
4249                 WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
4250         WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
4251
4252         /* ME */
4253         fw_data = (const __le32 *)
4254                 (adev->gfx.me_fw->data +
4255                  le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
4256         fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
4257         WREG32(mmCP_ME_RAM_WADDR, 0);
4258         for (i = 0; i < fw_size; i++)
4259                 WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
4260         WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
4261
4262         return 0;
4263 }
4264
4265 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
4266 {
4267         u32 count = 0;
4268         const struct cs_section_def *sect = NULL;
4269         const struct cs_extent_def *ext = NULL;
4270
4271         /* begin clear state */
4272         count += 2;
4273         /* context control state */
4274         count += 3;
4275
4276         for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4277                 for (ext = sect->section; ext->extent != NULL; ++ext) {
4278                         if (sect->id == SECT_CONTEXT)
4279                                 count += 2 + ext->reg_count;
4280                         else
4281                                 return 0;
4282                 }
4283         }
4284         /* pa_sc_raster_config/pa_sc_raster_config1 */
4285         count += 4;
4286         /* end clear state */
4287         count += 2;
4288         /* clear state */
4289         count += 2;
4290
4291         return count;
4292 }
4293
4294 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
4295 {
4296         struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
4297         const struct cs_section_def *sect = NULL;
4298         const struct cs_extent_def *ext = NULL;
4299         int r, i;
4300
4301         /* init the CP */
4302         WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
4303         WREG32(mmCP_ENDIAN_SWAP, 0);
4304         WREG32(mmCP_DEVICE_ID, 1);
4305
4306         gfx_v8_0_cp_gfx_enable(adev, true);
4307
4308         r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
4309         if (r) {
4310                 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
4311                 return r;
4312         }
4313
4314         /* clear state buffer */
4315         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4316         amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4317
4318         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4319         amdgpu_ring_write(ring, 0x80000000);
4320         amdgpu_ring_write(ring, 0x80000000);
4321
4322         for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4323                 for (ext = sect->section; ext->extent != NULL; ++ext) {
4324                         if (sect->id == SECT_CONTEXT) {
4325                                 amdgpu_ring_write(ring,
4326                                        PACKET3(PACKET3_SET_CONTEXT_REG,
4327                                                ext->reg_count));
4328                                 amdgpu_ring_write(ring,
4329                                        ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
4330                                 for (i = 0; i < ext->reg_count; i++)
4331                                         amdgpu_ring_write(ring, ext->extent[i]);
4332                         }
4333                 }
4334         }
4335
4336         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4337         amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
4338         switch (adev->asic_type) {
4339         case CHIP_TONGA:
4340         case CHIP_POLARIS10:
4341                 amdgpu_ring_write(ring, 0x16000012);
4342                 amdgpu_ring_write(ring, 0x0000002A);
4343                 break;
4344         case CHIP_POLARIS11:
4345         case CHIP_POLARIS12:
4346                 amdgpu_ring_write(ring, 0x16000012);
4347                 amdgpu_ring_write(ring, 0x00000000);
4348                 break;
4349         case CHIP_FIJI:
4350                 amdgpu_ring_write(ring, 0x3a00161a);
4351                 amdgpu_ring_write(ring, 0x0000002e);
4352                 break;
4353         case CHIP_CARRIZO:
4354                 amdgpu_ring_write(ring, 0x00000002);
4355                 amdgpu_ring_write(ring, 0x00000000);
4356                 break;
4357         case CHIP_TOPAZ:
4358                 amdgpu_ring_write(ring, adev->gfx.config.num_rbs == 1 ?
4359                                 0x00000000 : 0x00000002);
4360                 amdgpu_ring_write(ring, 0x00000000);
4361                 break;
4362         case CHIP_STONEY:
4363                 amdgpu_ring_write(ring, 0x00000000);
4364                 amdgpu_ring_write(ring, 0x00000000);
4365                 break;
4366         default:
4367                 BUG();
4368         }
4369
4370         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4371         amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4372
4373         amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4374         amdgpu_ring_write(ring, 0);
4375
4376         /* init the CE partitions */
4377         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4378         amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4379         amdgpu_ring_write(ring, 0x8000);
4380         amdgpu_ring_write(ring, 0x8000);
4381
4382         amdgpu_ring_commit(ring);
4383
4384         return 0;
4385 }
4386 static void gfx_v8_0_set_cpg_door_bell(struct amdgpu_device *adev, struct amdgpu_ring *ring)
4387 {
4388         u32 tmp;
4389         /* no gfx doorbells on iceland */
4390         if (adev->asic_type == CHIP_TOPAZ)
4391                 return;
4392
4393         tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
4394
4395         if (ring->use_doorbell) {
4396                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4397                                 DOORBELL_OFFSET, ring->doorbell_index);
4398                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4399                                                 DOORBELL_HIT, 0);
4400                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4401                                             DOORBELL_EN, 1);
4402         } else {
4403                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
4404         }
4405
4406         WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
4407
4408         if (adev->flags & AMD_IS_APU)
4409                 return;
4410
4411         tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
4412                                         DOORBELL_RANGE_LOWER,
4413                                         AMDGPU_DOORBELL_GFX_RING0);
4414         WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
4415
4416         WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
4417                 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
4418 }
4419
4420 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
4421 {
4422         struct amdgpu_ring *ring;
4423         u32 tmp;
4424         u32 rb_bufsz;
4425         u64 rb_addr, rptr_addr, wptr_gpu_addr;
4426         int r;
4427
4428         /* Set the write pointer delay */
4429         WREG32(mmCP_RB_WPTR_DELAY, 0);
4430
4431         /* set the RB to use vmid 0 */
4432         WREG32(mmCP_RB_VMID, 0);
4433
4434         /* Set ring buffer size */
4435         ring = &adev->gfx.gfx_ring[0];
4436         rb_bufsz = order_base_2(ring->ring_size / 8);
4437         tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
4438         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
4439         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
4440         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
4441 #ifdef __BIG_ENDIAN
4442         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
4443 #endif
4444         WREG32(mmCP_RB0_CNTL, tmp);
4445
4446         /* Initialize the ring buffer's read and write pointers */
4447         WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
4448         ring->wptr = 0;
4449         WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
4450
4451         /* set the wb address wether it's enabled or not */
4452         rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4453         WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
4454         WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
4455
4456         wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4457         WREG32(mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
4458         WREG32(mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
4459         mdelay(1);
4460         WREG32(mmCP_RB0_CNTL, tmp);
4461
4462         rb_addr = ring->gpu_addr >> 8;
4463         WREG32(mmCP_RB0_BASE, rb_addr);
4464         WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
4465
4466         gfx_v8_0_set_cpg_door_bell(adev, ring);
4467         /* start the ring */
4468         amdgpu_ring_clear_ring(ring);
4469         gfx_v8_0_cp_gfx_start(adev);
4470         ring->ready = true;
4471         r = amdgpu_ring_test_ring(ring);
4472         if (r)
4473                 ring->ready = false;
4474
4475         return r;
4476 }
4477
4478 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
4479 {
4480         int i;
4481
4482         if (enable) {
4483                 WREG32(mmCP_MEC_CNTL, 0);
4484         } else {
4485                 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
4486                 for (i = 0; i < adev->gfx.num_compute_rings; i++)
4487                         adev->gfx.compute_ring[i].ready = false;
4488                 adev->gfx.kiq.ring.ready = false;
4489         }
4490         udelay(50);
4491 }
4492
4493 static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev)
4494 {
4495         const struct gfx_firmware_header_v1_0 *mec_hdr;
4496         const __le32 *fw_data;
4497         unsigned i, fw_size;
4498
4499         if (!adev->gfx.mec_fw)
4500                 return -EINVAL;
4501
4502         gfx_v8_0_cp_compute_enable(adev, false);
4503
4504         mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
4505         amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
4506
4507         fw_data = (const __le32 *)
4508                 (adev->gfx.mec_fw->data +
4509                  le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4510         fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4511
4512         /* MEC1 */
4513         WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0);
4514         for (i = 0; i < fw_size; i++)
4515                 WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data+i));
4516         WREG32(mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version);
4517
4518         /* Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
4519         if (adev->gfx.mec2_fw) {
4520                 const struct gfx_firmware_header_v1_0 *mec2_hdr;
4521
4522                 mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
4523                 amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header);
4524
4525                 fw_data = (const __le32 *)
4526                         (adev->gfx.mec2_fw->data +
4527                          le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4528                 fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4529
4530                 WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0);
4531                 for (i = 0; i < fw_size; i++)
4532                         WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data+i));
4533                 WREG32(mmCP_MEC_ME2_UCODE_ADDR, adev->gfx.mec2_fw_version);
4534         }
4535
4536         return 0;
4537 }
4538
4539 /* KIQ functions */
4540 static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring)
4541 {
4542         uint32_t tmp;
4543         struct amdgpu_device *adev = ring->adev;
4544
4545         /* tell RLC which is KIQ queue */
4546         tmp = RREG32(mmRLC_CP_SCHEDULERS);
4547         tmp &= 0xffffff00;
4548         tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
4549         WREG32(mmRLC_CP_SCHEDULERS, tmp);
4550         tmp |= 0x80;
4551         WREG32(mmRLC_CP_SCHEDULERS, tmp);
4552 }
4553
4554 static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev)
4555 {
4556         struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
4557         uint32_t scratch, tmp = 0;
4558         uint64_t queue_mask = 0;
4559         int r, i;
4560
4561         for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
4562                 if (!test_bit(i, adev->gfx.mec.queue_bitmap))
4563                         continue;
4564
4565                 /* This situation may be hit in the future if a new HW
4566                  * generation exposes more than 64 queues. If so, the
4567                  * definition of queue_mask needs updating */
4568                 if (WARN_ON(i > (sizeof(queue_mask)*8))) {
4569                         DRM_ERROR("Invalid KCQ enabled: %d\n", i);
4570                         break;
4571                 }
4572
4573                 queue_mask |= (1ull << i);
4574         }
4575
4576         r = amdgpu_gfx_scratch_get(adev, &scratch);
4577         if (r) {
4578                 DRM_ERROR("Failed to get scratch reg (%d).\n", r);
4579                 return r;
4580         }
4581         WREG32(scratch, 0xCAFEDEAD);
4582
4583         r = amdgpu_ring_alloc(kiq_ring, (8 * adev->gfx.num_compute_rings) + 11);
4584         if (r) {
4585                 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
4586                 amdgpu_gfx_scratch_free(adev, scratch);
4587                 return r;
4588         }
4589         /* set resources */
4590         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
4591         amdgpu_ring_write(kiq_ring, 0); /* vmid_mask:0 queue_type:0 (KIQ) */
4592         amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */
4593         amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */
4594         amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
4595         amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
4596         amdgpu_ring_write(kiq_ring, 0); /* oac mask */
4597         amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
4598         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4599                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4600                 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
4601                 uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4602
4603                 /* map queues */
4604                 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
4605                 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
4606                 amdgpu_ring_write(kiq_ring,
4607                                   PACKET3_MAP_QUEUES_NUM_QUEUES(1));
4608                 amdgpu_ring_write(kiq_ring,
4609                                   PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index) |
4610                                   PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
4611                                   PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
4612                                   PACKET3_MAP_QUEUES_ME(ring->me == 1 ? 0 : 1)); /* doorbell */
4613                 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
4614                 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
4615                 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
4616                 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
4617         }
4618         /* write to scratch for completion */
4619         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
4620         amdgpu_ring_write(kiq_ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
4621         amdgpu_ring_write(kiq_ring, 0xDEADBEEF);
4622         amdgpu_ring_commit(kiq_ring);
4623
4624         for (i = 0; i < adev->usec_timeout; i++) {
4625                 tmp = RREG32(scratch);
4626                 if (tmp == 0xDEADBEEF)
4627                         break;
4628                 DRM_UDELAY(1);
4629         }
4630         if (i >= adev->usec_timeout) {
4631                 DRM_ERROR("KCQ enable failed (scratch(0x%04X)=0x%08X)\n",
4632                           scratch, tmp);
4633                 r = -EINVAL;
4634         }
4635         amdgpu_gfx_scratch_free(adev, scratch);
4636
4637         return r;
4638 }
4639
4640 static int gfx_v8_0_kiq_kcq_disable(struct amdgpu_device *adev)
4641 {
4642         struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
4643         uint32_t scratch, tmp = 0;
4644         int r, i;
4645
4646         r = amdgpu_gfx_scratch_get(adev, &scratch);
4647         if (r) {
4648                 DRM_ERROR("Failed to get scratch reg (%d).\n", r);
4649                 return r;
4650         }
4651         WREG32(scratch, 0xCAFEDEAD);
4652
4653         r = amdgpu_ring_alloc(kiq_ring, 6 + 3);
4654         if (r) {
4655                 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
4656                 amdgpu_gfx_scratch_free(adev, scratch);
4657                 return r;
4658         }
4659         /* unmap queues */
4660         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
4661         amdgpu_ring_write(kiq_ring,
4662                           PACKET3_UNMAP_QUEUES_ACTION(1)| /* RESET_QUEUES */
4663                           PACKET3_UNMAP_QUEUES_QUEUE_SEL(2)); /* select all queues */
4664         amdgpu_ring_write(kiq_ring, 0);
4665         amdgpu_ring_write(kiq_ring, 0);
4666         amdgpu_ring_write(kiq_ring, 0);
4667         amdgpu_ring_write(kiq_ring, 0);
4668         /* write to scratch for completion */
4669         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
4670         amdgpu_ring_write(kiq_ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
4671         amdgpu_ring_write(kiq_ring, 0xDEADBEEF);
4672         amdgpu_ring_commit(kiq_ring);
4673
4674         for (i = 0; i < adev->usec_timeout; i++) {
4675                 tmp = RREG32(scratch);
4676                 if (tmp == 0xDEADBEEF)
4677                         break;
4678                 DRM_UDELAY(1);
4679         }
4680         if (i >= adev->usec_timeout) {
4681                 DRM_ERROR("KCQ disabled failed (scratch(0x%04X)=0x%08X)\n",
4682                           scratch, tmp);
4683                 r = -EINVAL;
4684         }
4685         amdgpu_gfx_scratch_free(adev, scratch);
4686
4687         return r;
4688 }
4689
4690 static int gfx_v8_0_deactivate_hqd(struct amdgpu_device *adev, u32 req)
4691 {
4692         int i, r = 0;
4693
4694         if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) {
4695                 WREG32_FIELD(CP_HQD_DEQUEUE_REQUEST, DEQUEUE_REQ, req);
4696                 for (i = 0; i < adev->usec_timeout; i++) {
4697                         if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK))
4698                                 break;
4699                         udelay(1);
4700                 }
4701                 if (i == adev->usec_timeout)
4702                         r = -ETIMEDOUT;
4703         }
4704         WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
4705         WREG32(mmCP_HQD_PQ_RPTR, 0);
4706         WREG32(mmCP_HQD_PQ_WPTR, 0);
4707
4708         return r;
4709 }
4710
4711 static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
4712 {
4713         struct amdgpu_device *adev = ring->adev;
4714         struct vi_mqd *mqd = ring->mqd_ptr;
4715         uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
4716         uint32_t tmp;
4717
4718         /* init the mqd struct */
4719         memset(mqd, 0, sizeof(struct vi_mqd));
4720
4721         mqd->header = 0xC0310800;
4722         mqd->compute_pipelinestat_enable = 0x00000001;
4723         mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4724         mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4725         mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4726         mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4727         mqd->compute_misc_reserved = 0x00000003;
4728
4729         eop_base_addr = ring->eop_gpu_addr >> 8;
4730         mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
4731         mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
4732
4733         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4734         tmp = RREG32(mmCP_HQD_EOP_CONTROL);
4735         tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
4736                         (order_base_2(GFX8_MEC_HPD_SIZE / 4) - 1));
4737
4738         mqd->cp_hqd_eop_control = tmp;
4739
4740         /* enable doorbell? */
4741         tmp = REG_SET_FIELD(RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL),
4742                             CP_HQD_PQ_DOORBELL_CONTROL,
4743                             DOORBELL_EN,
4744                             ring->use_doorbell ? 1 : 0);
4745
4746         mqd->cp_hqd_pq_doorbell_control = tmp;
4747
4748         /* set the pointer to the MQD */
4749         mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
4750         mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
4751
4752         /* set MQD vmid to 0 */
4753         tmp = RREG32(mmCP_MQD_CONTROL);
4754         tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4755         mqd->cp_mqd_control = tmp;
4756
4757         /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4758         hqd_gpu_addr = ring->gpu_addr >> 8;
4759         mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4760         mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4761
4762         /* set up the HQD, this is similar to CP_RB0_CNTL */
4763         tmp = RREG32(mmCP_HQD_PQ_CONTROL);
4764         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4765                             (order_base_2(ring->ring_size / 4) - 1));
4766         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4767                         ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
4768 #ifdef __BIG_ENDIAN
4769         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
4770 #endif
4771         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
4772         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
4773         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4774         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4775         mqd->cp_hqd_pq_control = tmp;
4776
4777         /* set the wb address whether it's enabled or not */
4778         wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4779         mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4780         mqd->cp_hqd_pq_rptr_report_addr_hi =
4781                 upper_32_bits(wb_gpu_addr) & 0xffff;
4782
4783         /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4784         wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4785         mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
4786         mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4787
4788         tmp = 0;
4789         /* enable the doorbell if requested */
4790         if (ring->use_doorbell) {
4791                 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4792                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4793                                 DOORBELL_OFFSET, ring->doorbell_index);
4794
4795                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4796                                          DOORBELL_EN, 1);
4797                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4798                                          DOORBELL_SOURCE, 0);
4799                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4800                                          DOORBELL_HIT, 0);
4801         }
4802
4803         mqd->cp_hqd_pq_doorbell_control = tmp;
4804
4805         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4806         ring->wptr = 0;
4807         mqd->cp_hqd_pq_wptr = ring->wptr;
4808         mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
4809
4810         /* set the vmid for the queue */
4811         mqd->cp_hqd_vmid = 0;
4812
4813         tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
4814         tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
4815         mqd->cp_hqd_persistent_state = tmp;
4816
4817         /* set MTYPE */
4818         tmp = RREG32(mmCP_HQD_IB_CONTROL);
4819         tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
4820         tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MTYPE, 3);
4821         mqd->cp_hqd_ib_control = tmp;
4822
4823         tmp = RREG32(mmCP_HQD_IQ_TIMER);
4824         tmp = REG_SET_FIELD(tmp, CP_HQD_IQ_TIMER, MTYPE, 3);
4825         mqd->cp_hqd_iq_timer = tmp;
4826
4827         tmp = RREG32(mmCP_HQD_CTX_SAVE_CONTROL);
4828         tmp = REG_SET_FIELD(tmp, CP_HQD_CTX_SAVE_CONTROL, MTYPE, 3);
4829         mqd->cp_hqd_ctx_save_control = tmp;
4830
4831         /* defaults */
4832         mqd->cp_hqd_eop_rptr = RREG32(mmCP_HQD_EOP_RPTR);
4833         mqd->cp_hqd_eop_wptr = RREG32(mmCP_HQD_EOP_WPTR);
4834         mqd->cp_hqd_pipe_priority = RREG32(mmCP_HQD_PIPE_PRIORITY);
4835         mqd->cp_hqd_queue_priority = RREG32(mmCP_HQD_QUEUE_PRIORITY);
4836         mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM);
4837         mqd->cp_hqd_ctx_save_base_addr_lo = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_LO);
4838         mqd->cp_hqd_ctx_save_base_addr_hi = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_HI);
4839         mqd->cp_hqd_cntl_stack_offset = RREG32(mmCP_HQD_CNTL_STACK_OFFSET);
4840         mqd->cp_hqd_cntl_stack_size = RREG32(mmCP_HQD_CNTL_STACK_SIZE);
4841         mqd->cp_hqd_wg_state_offset = RREG32(mmCP_HQD_WG_STATE_OFFSET);
4842         mqd->cp_hqd_ctx_save_size = RREG32(mmCP_HQD_CTX_SAVE_SIZE);
4843         mqd->cp_hqd_eop_done_events = RREG32(mmCP_HQD_EOP_EVENTS);
4844         mqd->cp_hqd_error = RREG32(mmCP_HQD_ERROR);
4845         mqd->cp_hqd_eop_wptr_mem = RREG32(mmCP_HQD_EOP_WPTR_MEM);
4846         mqd->cp_hqd_eop_dones = RREG32(mmCP_HQD_EOP_DONES);
4847
4848         /* activate the queue */
4849         mqd->cp_hqd_active = 1;
4850
4851         return 0;
4852 }
4853
4854 int gfx_v8_0_mqd_commit(struct amdgpu_device *adev,
4855                         struct vi_mqd *mqd)
4856 {
4857         uint32_t mqd_reg;
4858         uint32_t *mqd_data;
4859
4860         /* HQD registers extend from mmCP_MQD_BASE_ADDR to mmCP_HQD_ERROR */
4861         mqd_data = &mqd->cp_mqd_base_addr_lo;
4862
4863         /* disable wptr polling */
4864         WREG32_FIELD(CP_PQ_WPTR_POLL_CNTL, EN, 0);
4865
4866         /* program all HQD registers */
4867         for (mqd_reg = mmCP_HQD_VMID; mqd_reg <= mmCP_HQD_EOP_CONTROL; mqd_reg++)
4868                 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4869
4870         /* Tonga errata: EOP RPTR/WPTR should be left unmodified.
4871          * This is safe since EOP RPTR==WPTR for any inactive HQD
4872          * on ASICs that do not support context-save.
4873          * EOP writes/reads can start anywhere in the ring.
4874          */
4875         if (adev->asic_type != CHIP_TONGA) {
4876                 WREG32(mmCP_HQD_EOP_RPTR, mqd->cp_hqd_eop_rptr);
4877                 WREG32(mmCP_HQD_EOP_WPTR, mqd->cp_hqd_eop_wptr);
4878                 WREG32(mmCP_HQD_EOP_WPTR_MEM, mqd->cp_hqd_eop_wptr_mem);
4879         }
4880
4881         for (mqd_reg = mmCP_HQD_EOP_EVENTS; mqd_reg <= mmCP_HQD_ERROR; mqd_reg++)
4882                 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4883
4884         /* activate the HQD */
4885         for (mqd_reg = mmCP_MQD_BASE_ADDR; mqd_reg <= mmCP_HQD_ACTIVE; mqd_reg++)
4886                 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4887
4888         return 0;
4889 }
4890
4891 static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring)
4892 {
4893         int r = 0;
4894         struct amdgpu_device *adev = ring->adev;
4895         struct vi_mqd *mqd = ring->mqd_ptr;
4896         int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
4897
4898         gfx_v8_0_kiq_setting(ring);
4899
4900         if (adev->gfx.in_reset) { /* for GPU_RESET case */
4901                 /* reset MQD to a clean status */
4902                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4903                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd));
4904
4905                 /* reset ring buffer */
4906                 ring->wptr = 0;
4907                 amdgpu_ring_clear_ring(ring);
4908                 mutex_lock(&adev->srbm_mutex);
4909                 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4910                 r = gfx_v8_0_deactivate_hqd(adev, 1);
4911                 if (r) {
4912                         dev_err(adev->dev, "failed to deactivate ring %s\n", ring->name);
4913                         goto out_unlock;
4914                 }
4915                 gfx_v8_0_mqd_commit(adev, mqd);
4916                 vi_srbm_select(adev, 0, 0, 0, 0);
4917                 mutex_unlock(&adev->srbm_mutex);
4918         } else {
4919                 mutex_lock(&adev->srbm_mutex);
4920                 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4921                 gfx_v8_0_mqd_init(ring);
4922                 r = gfx_v8_0_deactivate_hqd(adev, 1);
4923                 if (r) {
4924                         dev_err(adev->dev, "failed to deactivate ring %s\n", ring->name);
4925                         goto out_unlock;
4926                 }
4927                 gfx_v8_0_mqd_commit(adev, mqd);
4928                 vi_srbm_select(adev, 0, 0, 0, 0);
4929                 mutex_unlock(&adev->srbm_mutex);
4930
4931                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4932                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
4933         }
4934
4935         return r;
4936
4937 out_unlock:
4938         vi_srbm_select(adev, 0, 0, 0, 0);
4939         mutex_unlock(&adev->srbm_mutex);
4940         return r;
4941 }
4942
4943 static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring)
4944 {
4945         struct amdgpu_device *adev = ring->adev;
4946         struct vi_mqd *mqd = ring->mqd_ptr;
4947         int mqd_idx = ring - &adev->gfx.compute_ring[0];
4948
4949         if (!adev->gfx.in_reset && !adev->gfx.in_suspend) {
4950                 mutex_lock(&adev->srbm_mutex);
4951                 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4952                 gfx_v8_0_mqd_init(ring);
4953                 vi_srbm_select(adev, 0, 0, 0, 0);
4954                 mutex_unlock(&adev->srbm_mutex);
4955
4956                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4957                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
4958         } else if (adev->gfx.in_reset) { /* for GPU_RESET case */
4959                 /* reset MQD to a clean status */
4960                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4961                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd));
4962                 /* reset ring buffer */
4963                 ring->wptr = 0;
4964                 amdgpu_ring_clear_ring(ring);
4965         } else {
4966                 amdgpu_ring_clear_ring(ring);
4967         }
4968         return 0;
4969 }
4970
4971 static void gfx_v8_0_set_mec_doorbell_range(struct amdgpu_device *adev)
4972 {
4973         if (adev->asic_type > CHIP_TONGA) {
4974                 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, AMDGPU_DOORBELL_KIQ << 2);
4975                 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, AMDGPU_DOORBELL_MEC_RING7 << 2);
4976         }
4977         /* enable doorbells */
4978         WREG32_FIELD(CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4979 }
4980
4981 static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev)
4982 {
4983         struct amdgpu_ring *ring = NULL;
4984         int r = 0, i;
4985
4986         gfx_v8_0_cp_compute_enable(adev, true);
4987
4988         ring = &adev->gfx.kiq.ring;
4989
4990         r = amdgpu_bo_reserve(ring->mqd_obj, false);
4991         if (unlikely(r != 0))
4992                 goto done;
4993
4994         r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4995         if (!r) {
4996                 r = gfx_v8_0_kiq_init_queue(ring);
4997                 amdgpu_bo_kunmap(ring->mqd_obj);
4998                 ring->mqd_ptr = NULL;
4999         }
5000         amdgpu_bo_unreserve(ring->mqd_obj);
5001         if (r)
5002                 goto done;
5003
5004         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5005                 ring = &adev->gfx.compute_ring[i];
5006
5007                 r = amdgpu_bo_reserve(ring->mqd_obj, false);
5008                 if (unlikely(r != 0))
5009                         goto done;
5010                 r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
5011                 if (!r) {
5012                         r = gfx_v8_0_kcq_init_queue(ring);
5013                         amdgpu_bo_kunmap(ring->mqd_obj);
5014                         ring->mqd_ptr = NULL;
5015                 }
5016                 amdgpu_bo_unreserve(ring->mqd_obj);
5017                 if (r)
5018                         goto done;
5019         }
5020
5021         gfx_v8_0_set_mec_doorbell_range(adev);
5022
5023         r = gfx_v8_0_kiq_kcq_enable(adev);
5024         if (r)
5025                 goto done;
5026
5027         /* Test KIQ */
5028         ring = &adev->gfx.kiq.ring;
5029         ring->ready = true;
5030         r = amdgpu_ring_test_ring(ring);
5031         if (r) {
5032                 ring->ready = false;
5033                 goto done;
5034         }
5035
5036         /* Test KCQs */
5037         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5038                 ring = &adev->gfx.compute_ring[i];
5039                 ring->ready = true;
5040                 r = amdgpu_ring_test_ring(ring);
5041                 if (r)
5042                         ring->ready = false;
5043         }
5044
5045 done:
5046         return r;
5047 }
5048
5049 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
5050 {
5051         int r;
5052
5053         if (!(adev->flags & AMD_IS_APU))
5054                 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5055
5056         if (!adev->pp_enabled) {
5057                 if (adev->firmware.load_type != AMDGPU_FW_LOAD_SMU) {
5058                         /* legacy firmware loading */
5059                         r = gfx_v8_0_cp_gfx_load_microcode(adev);
5060                         if (r)
5061                                 return r;
5062
5063                         r = gfx_v8_0_cp_compute_load_microcode(adev);
5064                         if (r)
5065                                 return r;
5066                 } else {
5067                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
5068                                                         AMDGPU_UCODE_ID_CP_CE);
5069                         if (r)
5070                                 return -EINVAL;
5071
5072                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
5073                                                         AMDGPU_UCODE_ID_CP_PFP);
5074                         if (r)
5075                                 return -EINVAL;
5076
5077                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
5078                                                         AMDGPU_UCODE_ID_CP_ME);
5079                         if (r)
5080                                 return -EINVAL;
5081
5082                         if (adev->asic_type == CHIP_TOPAZ) {
5083                                 r = gfx_v8_0_cp_compute_load_microcode(adev);
5084                                 if (r)
5085                                         return r;
5086                         } else {
5087                                 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
5088                                                                                  AMDGPU_UCODE_ID_CP_MEC1);
5089                                 if (r)
5090                                         return -EINVAL;
5091                         }
5092                 }
5093         }
5094
5095         r = gfx_v8_0_cp_gfx_resume(adev);
5096         if (r)
5097                 return r;
5098
5099         r = gfx_v8_0_kiq_resume(adev);
5100         if (r)
5101                 return r;
5102
5103         gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5104
5105         return 0;
5106 }
5107
5108 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
5109 {
5110         gfx_v8_0_cp_gfx_enable(adev, enable);
5111         gfx_v8_0_cp_compute_enable(adev, enable);
5112 }
5113
5114 static int gfx_v8_0_hw_init(void *handle)
5115 {
5116         int r;
5117         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5118
5119         gfx_v8_0_init_golden_registers(adev);
5120         gfx_v8_0_gpu_init(adev);
5121
5122         r = gfx_v8_0_rlc_resume(adev);
5123         if (r)
5124                 return r;
5125
5126         r = gfx_v8_0_cp_resume(adev);
5127
5128         return r;
5129 }
5130
5131 static int gfx_v8_0_hw_fini(void *handle)
5132 {
5133         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5134
5135         amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
5136         amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
5137         if (amdgpu_sriov_vf(adev)) {
5138                 pr_debug("For SRIOV client, shouldn't do anything.\n");
5139                 return 0;
5140         }
5141         gfx_v8_0_kiq_kcq_disable(adev);
5142         gfx_v8_0_cp_enable(adev, false);
5143         gfx_v8_0_rlc_stop(adev);
5144
5145         amdgpu_set_powergating_state(adev,
5146                         AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_UNGATE);
5147
5148         return 0;
5149 }
5150
5151 static int gfx_v8_0_suspend(void *handle)
5152 {
5153         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5154         adev->gfx.in_suspend = true;
5155         return gfx_v8_0_hw_fini(adev);
5156 }
5157
5158 static int gfx_v8_0_resume(void *handle)
5159 {
5160         int r;
5161         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5162
5163         r = gfx_v8_0_hw_init(adev);
5164         adev->gfx.in_suspend = false;
5165         return r;
5166 }
5167
5168 static bool gfx_v8_0_is_idle(void *handle)
5169 {
5170         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5171
5172         if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE))
5173                 return false;
5174         else
5175                 return true;
5176 }
5177
5178 static int gfx_v8_0_wait_for_idle(void *handle)
5179 {
5180         unsigned i;
5181         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5182
5183         for (i = 0; i < adev->usec_timeout; i++) {
5184                 if (gfx_v8_0_is_idle(handle))
5185                         return 0;
5186
5187                 udelay(1);
5188         }
5189         return -ETIMEDOUT;
5190 }
5191
5192 static bool gfx_v8_0_check_soft_reset(void *handle)
5193 {
5194         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5195         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5196         u32 tmp;
5197
5198         /* GRBM_STATUS */
5199         tmp = RREG32(mmGRBM_STATUS);
5200         if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
5201                    GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
5202                    GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
5203                    GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
5204                    GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
5205                    GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK |
5206                    GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
5207                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5208                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
5209                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5210                                                 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
5211                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5212                                                 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5213         }
5214
5215         /* GRBM_STATUS2 */
5216         tmp = RREG32(mmGRBM_STATUS2);
5217         if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
5218                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5219                                                 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
5220
5221         if (REG_GET_FIELD(tmp, GRBM_STATUS2, CPF_BUSY) ||
5222             REG_GET_FIELD(tmp, GRBM_STATUS2, CPC_BUSY) ||
5223             REG_GET_FIELD(tmp, GRBM_STATUS2, CPG_BUSY)) {
5224                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5225                                                 SOFT_RESET_CPF, 1);
5226                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5227                                                 SOFT_RESET_CPC, 1);
5228                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5229                                                 SOFT_RESET_CPG, 1);
5230                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET,
5231                                                 SOFT_RESET_GRBM, 1);
5232         }
5233
5234         /* SRBM_STATUS */
5235         tmp = RREG32(mmSRBM_STATUS);
5236         if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
5237                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5238                                                 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5239         if (REG_GET_FIELD(tmp, SRBM_STATUS, SEM_BUSY))
5240                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5241                                                 SRBM_SOFT_RESET, SOFT_RESET_SEM, 1);
5242
5243         if (grbm_soft_reset || srbm_soft_reset) {
5244                 adev->gfx.grbm_soft_reset = grbm_soft_reset;
5245                 adev->gfx.srbm_soft_reset = srbm_soft_reset;
5246                 return true;
5247         } else {
5248                 adev->gfx.grbm_soft_reset = 0;
5249                 adev->gfx.srbm_soft_reset = 0;
5250                 return false;
5251         }
5252 }
5253
5254 static int gfx_v8_0_pre_soft_reset(void *handle)
5255 {
5256         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5257         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5258
5259         if ((!adev->gfx.grbm_soft_reset) &&
5260             (!adev->gfx.srbm_soft_reset))
5261                 return 0;
5262
5263         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5264         srbm_soft_reset = adev->gfx.srbm_soft_reset;
5265
5266         /* stop the rlc */
5267         gfx_v8_0_rlc_stop(adev);
5268
5269         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5270             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5271                 /* Disable GFX parsing/prefetching */
5272                 gfx_v8_0_cp_gfx_enable(adev, false);
5273
5274         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5275             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5276             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5277             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5278                 int i;
5279
5280                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5281                         struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5282
5283                         mutex_lock(&adev->srbm_mutex);
5284                         vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5285                         gfx_v8_0_deactivate_hqd(adev, 2);
5286                         vi_srbm_select(adev, 0, 0, 0, 0);
5287                         mutex_unlock(&adev->srbm_mutex);
5288                 }
5289                 /* Disable MEC parsing/prefetching */
5290                 gfx_v8_0_cp_compute_enable(adev, false);
5291         }
5292
5293        return 0;
5294 }
5295
5296 static int gfx_v8_0_soft_reset(void *handle)
5297 {
5298         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5299         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5300         u32 tmp;
5301
5302         if ((!adev->gfx.grbm_soft_reset) &&
5303             (!adev->gfx.srbm_soft_reset))
5304                 return 0;
5305
5306         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5307         srbm_soft_reset = adev->gfx.srbm_soft_reset;
5308
5309         if (grbm_soft_reset || srbm_soft_reset) {
5310                 tmp = RREG32(mmGMCON_DEBUG);
5311                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 1);
5312                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 1);
5313                 WREG32(mmGMCON_DEBUG, tmp);
5314                 udelay(50);
5315         }
5316
5317         if (grbm_soft_reset) {
5318                 tmp = RREG32(mmGRBM_SOFT_RESET);
5319                 tmp |= grbm_soft_reset;
5320                 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5321                 WREG32(mmGRBM_SOFT_RESET, tmp);
5322                 tmp = RREG32(mmGRBM_SOFT_RESET);
5323
5324                 udelay(50);
5325
5326                 tmp &= ~grbm_soft_reset;
5327                 WREG32(mmGRBM_SOFT_RESET, tmp);
5328                 tmp = RREG32(mmGRBM_SOFT_RESET);
5329         }
5330
5331         if (srbm_soft_reset) {
5332                 tmp = RREG32(mmSRBM_SOFT_RESET);
5333                 tmp |= srbm_soft_reset;
5334                 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5335                 WREG32(mmSRBM_SOFT_RESET, tmp);
5336                 tmp = RREG32(mmSRBM_SOFT_RESET);
5337
5338                 udelay(50);
5339
5340                 tmp &= ~srbm_soft_reset;
5341                 WREG32(mmSRBM_SOFT_RESET, tmp);
5342                 tmp = RREG32(mmSRBM_SOFT_RESET);
5343         }
5344
5345         if (grbm_soft_reset || srbm_soft_reset) {
5346                 tmp = RREG32(mmGMCON_DEBUG);
5347                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 0);
5348                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 0);
5349                 WREG32(mmGMCON_DEBUG, tmp);
5350         }
5351
5352         /* Wait a little for things to settle down */
5353         udelay(50);
5354
5355         return 0;
5356 }
5357
5358 static int gfx_v8_0_post_soft_reset(void *handle)
5359 {
5360         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5361         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5362
5363         if ((!adev->gfx.grbm_soft_reset) &&
5364             (!adev->gfx.srbm_soft_reset))
5365                 return 0;
5366
5367         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5368         srbm_soft_reset = adev->gfx.srbm_soft_reset;
5369
5370         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5371             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5372                 gfx_v8_0_cp_gfx_resume(adev);
5373
5374         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5375             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5376             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5377             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5378                 int i;
5379
5380                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5381                         struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5382
5383                         mutex_lock(&adev->srbm_mutex);
5384                         vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5385                         gfx_v8_0_deactivate_hqd(adev, 2);
5386                         vi_srbm_select(adev, 0, 0, 0, 0);
5387                         mutex_unlock(&adev->srbm_mutex);
5388                 }
5389                 gfx_v8_0_kiq_resume(adev);
5390         }
5391         gfx_v8_0_rlc_start(adev);
5392
5393         return 0;
5394 }
5395
5396 /**
5397  * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
5398  *
5399  * @adev: amdgpu_device pointer
5400  *
5401  * Fetches a GPU clock counter snapshot.
5402  * Returns the 64 bit clock counter snapshot.
5403  */
5404 static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
5405 {
5406         uint64_t clock;
5407
5408         mutex_lock(&adev->gfx.gpu_clock_mutex);
5409         WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
5410         clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
5411                 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5412         mutex_unlock(&adev->gfx.gpu_clock_mutex);
5413         return clock;
5414 }
5415
5416 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
5417                                           uint32_t vmid,
5418                                           uint32_t gds_base, uint32_t gds_size,
5419                                           uint32_t gws_base, uint32_t gws_size,
5420                                           uint32_t oa_base, uint32_t oa_size)
5421 {
5422         gds_base = gds_base >> AMDGPU_GDS_SHIFT;
5423         gds_size = gds_size >> AMDGPU_GDS_SHIFT;
5424
5425         gws_base = gws_base >> AMDGPU_GWS_SHIFT;
5426         gws_size = gws_size >> AMDGPU_GWS_SHIFT;
5427
5428         oa_base = oa_base >> AMDGPU_OA_SHIFT;
5429         oa_size = oa_size >> AMDGPU_OA_SHIFT;
5430
5431         /* GDS Base */
5432         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5433         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5434                                 WRITE_DATA_DST_SEL(0)));
5435         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
5436         amdgpu_ring_write(ring, 0);
5437         amdgpu_ring_write(ring, gds_base);
5438
5439         /* GDS Size */
5440         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5441         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5442                                 WRITE_DATA_DST_SEL(0)));
5443         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
5444         amdgpu_ring_write(ring, 0);
5445         amdgpu_ring_write(ring, gds_size);
5446
5447         /* GWS */
5448         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5449         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5450                                 WRITE_DATA_DST_SEL(0)));
5451         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
5452         amdgpu_ring_write(ring, 0);
5453         amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
5454
5455         /* OA */
5456         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5457         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5458                                 WRITE_DATA_DST_SEL(0)));
5459         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
5460         amdgpu_ring_write(ring, 0);
5461         amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
5462 }
5463
5464 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
5465 {
5466         WREG32(mmSQ_IND_INDEX,
5467                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5468                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5469                 (address << SQ_IND_INDEX__INDEX__SHIFT) |
5470                 (SQ_IND_INDEX__FORCE_READ_MASK));
5471         return RREG32(mmSQ_IND_DATA);
5472 }
5473
5474 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
5475                            uint32_t wave, uint32_t thread,
5476                            uint32_t regno, uint32_t num, uint32_t *out)
5477 {
5478         WREG32(mmSQ_IND_INDEX,
5479                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5480                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5481                 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
5482                 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
5483                 (SQ_IND_INDEX__FORCE_READ_MASK) |
5484                 (SQ_IND_INDEX__AUTO_INCR_MASK));
5485         while (num--)
5486                 *(out++) = RREG32(mmSQ_IND_DATA);
5487 }
5488
5489 static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
5490 {
5491         /* type 0 wave data */
5492         dst[(*no_fields)++] = 0;
5493         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
5494         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
5495         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
5496         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
5497         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
5498         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
5499         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
5500         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
5501         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
5502         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
5503         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
5504         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
5505         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_LO);
5506         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_HI);
5507         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_LO);
5508         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI);
5509         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
5510         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
5511 }
5512
5513 static void gfx_v8_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
5514                                      uint32_t wave, uint32_t start,
5515                                      uint32_t size, uint32_t *dst)
5516 {
5517         wave_read_regs(
5518                 adev, simd, wave, 0,
5519                 start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
5520 }
5521
5522
5523 static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = {
5524         .get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter,
5525         .select_se_sh = &gfx_v8_0_select_se_sh,
5526         .read_wave_data = &gfx_v8_0_read_wave_data,
5527         .read_wave_sgprs = &gfx_v8_0_read_wave_sgprs,
5528 };
5529
5530 static int gfx_v8_0_early_init(void *handle)
5531 {
5532         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5533
5534         adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
5535         adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
5536         adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
5537         gfx_v8_0_set_ring_funcs(adev);
5538         gfx_v8_0_set_irq_funcs(adev);
5539         gfx_v8_0_set_gds_init(adev);
5540         gfx_v8_0_set_rlc_funcs(adev);
5541
5542         return 0;
5543 }
5544
5545 static int gfx_v8_0_late_init(void *handle)
5546 {
5547         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5548         int r;
5549
5550         r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
5551         if (r)
5552                 return r;
5553
5554         r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
5555         if (r)
5556                 return r;
5557
5558         /* requires IBs so do in late init after IB pool is initialized */
5559         r = gfx_v8_0_do_edc_gpr_workarounds(adev);
5560         if (r)
5561                 return r;
5562
5563         amdgpu_set_powergating_state(adev,
5564                         AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_GATE);
5565
5566         return 0;
5567 }
5568
5569 static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
5570                                                        bool enable)
5571 {
5572         if ((adev->asic_type == CHIP_POLARIS11) ||
5573             (adev->asic_type == CHIP_POLARIS12))
5574                 /* Send msg to SMU via Powerplay */
5575                 amdgpu_set_powergating_state(adev,
5576                                              AMD_IP_BLOCK_TYPE_SMC,
5577                                              enable ?
5578                                              AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE);
5579
5580         WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0);
5581 }
5582
5583 static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
5584                                                         bool enable)
5585 {
5586         WREG32_FIELD(RLC_PG_CNTL, DYN_PER_CU_PG_ENABLE, enable ? 1 : 0);
5587 }
5588
5589 static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
5590                 bool enable)
5591 {
5592         WREG32_FIELD(RLC_PG_CNTL, QUICK_PG_ENABLE, enable ? 1 : 0);
5593 }
5594
5595 static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
5596                                           bool enable)
5597 {
5598         WREG32_FIELD(RLC_PG_CNTL, GFX_POWER_GATING_ENABLE, enable ? 1 : 0);
5599 }
5600
5601 static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev,
5602                                                 bool enable)
5603 {
5604         WREG32_FIELD(RLC_PG_CNTL, GFX_PIPELINE_PG_ENABLE, enable ? 1 : 0);
5605
5606         /* Read any GFX register to wake up GFX. */
5607         if (!enable)
5608                 RREG32(mmDB_RENDER_CONTROL);
5609 }
5610
5611 static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev,
5612                                           bool enable)
5613 {
5614         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
5615                 cz_enable_gfx_cg_power_gating(adev, true);
5616                 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
5617                         cz_enable_gfx_pipeline_power_gating(adev, true);
5618         } else {
5619                 cz_enable_gfx_cg_power_gating(adev, false);
5620                 cz_enable_gfx_pipeline_power_gating(adev, false);
5621         }
5622 }
5623
5624 static int gfx_v8_0_set_powergating_state(void *handle,
5625                                           enum amd_powergating_state state)
5626 {
5627         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5628         bool enable = (state == AMD_PG_STATE_GATE);
5629
5630         if (amdgpu_sriov_vf(adev))
5631                 return 0;
5632
5633         switch (adev->asic_type) {
5634         case CHIP_CARRIZO:
5635         case CHIP_STONEY:
5636
5637                 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
5638                         cz_enable_sck_slow_down_on_power_up(adev, true);
5639                         cz_enable_sck_slow_down_on_power_down(adev, true);
5640                 } else {
5641                         cz_enable_sck_slow_down_on_power_up(adev, false);
5642                         cz_enable_sck_slow_down_on_power_down(adev, false);
5643                 }
5644                 if (adev->pg_flags & AMD_PG_SUPPORT_CP)
5645                         cz_enable_cp_power_gating(adev, true);
5646                 else
5647                         cz_enable_cp_power_gating(adev, false);
5648
5649                 cz_update_gfx_cg_power_gating(adev, enable);
5650
5651                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5652                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5653                 else
5654                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5655
5656                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5657                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5658                 else
5659                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5660                 break;
5661         case CHIP_POLARIS11:
5662         case CHIP_POLARIS12:
5663                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5664                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5665                 else
5666                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5667
5668                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5669                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5670                 else
5671                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5672
5673                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable)
5674                         polaris11_enable_gfx_quick_mg_power_gating(adev, true);
5675                 else
5676                         polaris11_enable_gfx_quick_mg_power_gating(adev, false);
5677                 break;
5678         default:
5679                 break;
5680         }
5681
5682         return 0;
5683 }
5684
5685 static void gfx_v8_0_get_clockgating_state(void *handle, u32 *flags)
5686 {
5687         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5688         int data;
5689
5690         if (amdgpu_sriov_vf(adev))
5691                 *flags = 0;
5692
5693         /* AMD_CG_SUPPORT_GFX_MGCG */
5694         data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5695         if (!(data & RLC_CGTT_MGCG_OVERRIDE__CPF_MASK))
5696                 *flags |= AMD_CG_SUPPORT_GFX_MGCG;
5697
5698         /* AMD_CG_SUPPORT_GFX_CGLG */
5699         data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5700         if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5701                 *flags |= AMD_CG_SUPPORT_GFX_CGCG;
5702
5703         /* AMD_CG_SUPPORT_GFX_CGLS */
5704         if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5705                 *flags |= AMD_CG_SUPPORT_GFX_CGLS;
5706
5707         /* AMD_CG_SUPPORT_GFX_CGTS */
5708         data = RREG32(mmCGTS_SM_CTRL_REG);
5709         if (!(data & CGTS_SM_CTRL_REG__OVERRIDE_MASK))
5710                 *flags |= AMD_CG_SUPPORT_GFX_CGTS;
5711
5712         /* AMD_CG_SUPPORT_GFX_CGTS_LS */
5713         if (!(data & CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK))
5714                 *flags |= AMD_CG_SUPPORT_GFX_CGTS_LS;
5715
5716         /* AMD_CG_SUPPORT_GFX_RLC_LS */
5717         data = RREG32(mmRLC_MEM_SLP_CNTL);
5718         if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
5719                 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
5720
5721         /* AMD_CG_SUPPORT_GFX_CP_LS */
5722         data = RREG32(mmCP_MEM_SLP_CNTL);
5723         if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
5724                 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
5725 }
5726
5727 static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
5728                                      uint32_t reg_addr, uint32_t cmd)
5729 {
5730         uint32_t data;
5731
5732         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
5733
5734         WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5735         WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5736
5737         data = RREG32(mmRLC_SERDES_WR_CTRL);
5738         if (adev->asic_type == CHIP_STONEY)
5739                 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5740                           RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5741                           RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5742                           RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5743                           RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5744                           RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5745                           RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5746                           RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5747                           RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5748         else
5749                 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5750                           RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5751                           RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5752                           RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5753                           RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5754                           RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5755                           RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5756                           RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5757                           RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
5758                           RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
5759                           RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5760         data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
5761                  (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
5762                  (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
5763                  (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
5764
5765         WREG32(mmRLC_SERDES_WR_CTRL, data);
5766 }
5767
5768 #define MSG_ENTER_RLC_SAFE_MODE     1
5769 #define MSG_EXIT_RLC_SAFE_MODE      0
5770 #define RLC_GPR_REG2__REQ_MASK 0x00000001
5771 #define RLC_GPR_REG2__REQ__SHIFT 0
5772 #define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001
5773 #define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e
5774
5775 static void iceland_enter_rlc_safe_mode(struct amdgpu_device *adev)
5776 {
5777         u32 data;
5778         unsigned i;
5779
5780         data = RREG32(mmRLC_CNTL);
5781         if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5782                 return;
5783
5784         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5785                 data |= RLC_SAFE_MODE__CMD_MASK;
5786                 data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5787                 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
5788                 WREG32(mmRLC_SAFE_MODE, data);
5789
5790                 for (i = 0; i < adev->usec_timeout; i++) {
5791                         if ((RREG32(mmRLC_GPM_STAT) &
5792                              (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5793                               RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5794                             (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5795                              RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5796                                 break;
5797                         udelay(1);
5798                 }
5799
5800                 for (i = 0; i < adev->usec_timeout; i++) {
5801                         if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5802                                 break;
5803                         udelay(1);
5804                 }
5805                 adev->gfx.rlc.in_safe_mode = true;
5806         }
5807 }
5808
5809 static void iceland_exit_rlc_safe_mode(struct amdgpu_device *adev)
5810 {
5811         u32 data = 0;
5812         unsigned i;
5813
5814         data = RREG32(mmRLC_CNTL);
5815         if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5816                 return;
5817
5818         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5819                 if (adev->gfx.rlc.in_safe_mode) {
5820                         data |= RLC_SAFE_MODE__CMD_MASK;
5821                         data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5822                         WREG32(mmRLC_SAFE_MODE, data);
5823                         adev->gfx.rlc.in_safe_mode = false;
5824                 }
5825         }
5826
5827         for (i = 0; i < adev->usec_timeout; i++) {
5828                 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5829                         break;
5830                 udelay(1);
5831         }
5832 }
5833
5834 static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
5835         .enter_safe_mode = iceland_enter_rlc_safe_mode,
5836         .exit_safe_mode = iceland_exit_rlc_safe_mode
5837 };
5838
5839 static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
5840                                                       bool enable)
5841 {
5842         uint32_t temp, data;
5843
5844         adev->gfx.rlc.funcs->enter_safe_mode(adev);
5845
5846         /* It is disabled by HW by default */
5847         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
5848                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5849                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS)
5850                                 /* 1 - RLC memory Light sleep */
5851                                 WREG32_FIELD(RLC_MEM_SLP_CNTL, RLC_MEM_LS_EN, 1);
5852
5853                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS)
5854                                 WREG32_FIELD(CP_MEM_SLP_CNTL, CP_MEM_LS_EN, 1);
5855                 }
5856
5857                 /* 3 - RLC_CGTT_MGCG_OVERRIDE */
5858                 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5859                 if (adev->flags & AMD_IS_APU)
5860                         data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5861                                   RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5862                                   RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK);
5863                 else
5864                         data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5865                                   RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5866                                   RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5867                                   RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5868
5869                 if (temp != data)
5870                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5871
5872                 /* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5873                 gfx_v8_0_wait_for_rlc_serdes(adev);
5874
5875                 /* 5 - clear mgcg override */
5876                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5877
5878                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
5879                         /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
5880                         temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5881                         data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
5882                         data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
5883                         data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
5884                         data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
5885                         if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
5886                             (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
5887                                 data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
5888                         data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
5889                         data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
5890                         if (temp != data)
5891                                 WREG32(mmCGTS_SM_CTRL_REG, data);
5892                 }
5893                 udelay(50);
5894
5895                 /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5896                 gfx_v8_0_wait_for_rlc_serdes(adev);
5897         } else {
5898                 /* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
5899                 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5900                 data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5901                                 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5902                                 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5903                                 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5904                 if (temp != data)
5905                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5906
5907                 /* 2 - disable MGLS in RLC */
5908                 data = RREG32(mmRLC_MEM_SLP_CNTL);
5909                 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
5910                         data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5911                         WREG32(mmRLC_MEM_SLP_CNTL, data);
5912                 }
5913
5914                 /* 3 - disable MGLS in CP */
5915                 data = RREG32(mmCP_MEM_SLP_CNTL);
5916                 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
5917                         data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5918                         WREG32(mmCP_MEM_SLP_CNTL, data);
5919                 }
5920
5921                 /* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
5922                 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5923                 data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
5924                                 CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
5925                 if (temp != data)
5926                         WREG32(mmCGTS_SM_CTRL_REG, data);
5927
5928                 /* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5929                 gfx_v8_0_wait_for_rlc_serdes(adev);
5930
5931                 /* 6 - set mgcg override */
5932                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5933
5934                 udelay(50);
5935
5936                 /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5937                 gfx_v8_0_wait_for_rlc_serdes(adev);
5938         }
5939
5940         adev->gfx.rlc.funcs->exit_safe_mode(adev);
5941 }
5942
5943 static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5944                                                       bool enable)
5945 {
5946         uint32_t temp, temp1, data, data1;
5947
5948         temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5949
5950         adev->gfx.rlc.funcs->enter_safe_mode(adev);
5951
5952         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
5953                 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5954                 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
5955                 if (temp1 != data1)
5956                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5957
5958                 /* : wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5959                 gfx_v8_0_wait_for_rlc_serdes(adev);
5960
5961                 /* 2 - clear cgcg override */
5962                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5963
5964                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5965                 gfx_v8_0_wait_for_rlc_serdes(adev);
5966
5967                 /* 3 - write cmd to set CGLS */
5968                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
5969
5970                 /* 4 - enable cgcg */
5971                 data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5972
5973                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5974                         /* enable cgls*/
5975                         data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5976
5977                         temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5978                         data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
5979
5980                         if (temp1 != data1)
5981                                 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5982                 } else {
5983                         data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5984                 }
5985
5986                 if (temp != data)
5987                         WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5988
5989                 /* 5 enable cntx_empty_int_enable/cntx_busy_int_enable/
5990                  * Cmp_busy/GFX_Idle interrupts
5991                  */
5992                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5993         } else {
5994                 /* disable cntx_empty_int_enable & GFX Idle interrupt */
5995                 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5996
5997                 /* TEST CGCG */
5998                 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5999                 data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
6000                                 RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
6001                 if (temp1 != data1)
6002                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
6003
6004                 /* read gfx register to wake up cgcg */
6005                 RREG32(mmCB_CGTT_SCLK_CTRL);
6006                 RREG32(mmCB_CGTT_SCLK_CTRL);
6007                 RREG32(mmCB_CGTT_SCLK_CTRL);
6008                 RREG32(mmCB_CGTT_SCLK_CTRL);
6009
6010                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6011                 gfx_v8_0_wait_for_rlc_serdes(adev);
6012
6013                 /* write cmd to Set CGCG Overrride */
6014                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
6015
6016                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6017                 gfx_v8_0_wait_for_rlc_serdes(adev);
6018
6019                 /* write cmd to Clear CGLS */
6020                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
6021
6022                 /* disable cgcg, cgls should be disabled too. */
6023                 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
6024                           RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
6025                 if (temp != data)
6026                         WREG32(mmRLC_CGCG_CGLS_CTRL, data);
6027                 /* enable interrupts again for PG */
6028                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
6029         }
6030
6031         gfx_v8_0_wait_for_rlc_serdes(adev);
6032
6033         adev->gfx.rlc.funcs->exit_safe_mode(adev);
6034 }
6035 static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
6036                                             bool enable)
6037 {
6038         if (enable) {
6039                 /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
6040                  * ===  MGCG + MGLS + TS(CG/LS) ===
6041                  */
6042                 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
6043                 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
6044         } else {
6045                 /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
6046                  * ===  CGCG + CGLS ===
6047                  */
6048                 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
6049                 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
6050         }
6051         return 0;
6052 }
6053
6054 static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev,
6055                                           enum amd_clockgating_state state)
6056 {
6057         uint32_t msg_id, pp_state = 0;
6058         uint32_t pp_support_state = 0;
6059         void *pp_handle = adev->powerplay.pp_handle;
6060
6061         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
6062                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
6063                         pp_support_state = PP_STATE_SUPPORT_LS;
6064                         pp_state = PP_STATE_LS;
6065                 }
6066                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
6067                         pp_support_state |= PP_STATE_SUPPORT_CG;
6068                         pp_state |= PP_STATE_CG;
6069                 }
6070                 if (state == AMD_CG_STATE_UNGATE)
6071                         pp_state = 0;
6072
6073                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6074                                 PP_BLOCK_GFX_CG,
6075                                 pp_support_state,
6076                                 pp_state);
6077                 amd_set_clockgating_by_smu(pp_handle, msg_id);
6078         }
6079
6080         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
6081                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
6082                         pp_support_state = PP_STATE_SUPPORT_LS;
6083                         pp_state = PP_STATE_LS;
6084                 }
6085
6086                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
6087                         pp_support_state |= PP_STATE_SUPPORT_CG;
6088                         pp_state |= PP_STATE_CG;
6089                 }
6090
6091                 if (state == AMD_CG_STATE_UNGATE)
6092                         pp_state = 0;
6093
6094                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6095                                 PP_BLOCK_GFX_MG,
6096                                 pp_support_state,
6097                                 pp_state);
6098                 amd_set_clockgating_by_smu(pp_handle, msg_id);
6099         }
6100
6101         return 0;
6102 }
6103
6104 static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev,
6105                                           enum amd_clockgating_state state)
6106 {
6107
6108         uint32_t msg_id, pp_state = 0;
6109         uint32_t pp_support_state = 0;
6110         void *pp_handle = adev->powerplay.pp_handle;
6111
6112         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
6113                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
6114                         pp_support_state = PP_STATE_SUPPORT_LS;
6115                         pp_state = PP_STATE_LS;
6116                 }
6117                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
6118                         pp_support_state |= PP_STATE_SUPPORT_CG;
6119                         pp_state |= PP_STATE_CG;
6120                 }
6121                 if (state == AMD_CG_STATE_UNGATE)
6122                         pp_state = 0;
6123
6124                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6125                                 PP_BLOCK_GFX_CG,
6126                                 pp_support_state,
6127                                 pp_state);
6128                 amd_set_clockgating_by_smu(pp_handle, msg_id);
6129         }
6130
6131         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_GFX_3D_CGLS)) {
6132                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) {
6133                         pp_support_state = PP_STATE_SUPPORT_LS;
6134                         pp_state = PP_STATE_LS;
6135                 }
6136                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) {
6137                         pp_support_state |= PP_STATE_SUPPORT_CG;
6138                         pp_state |= PP_STATE_CG;
6139                 }
6140                 if (state == AMD_CG_STATE_UNGATE)
6141                         pp_state = 0;
6142
6143                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6144                                 PP_BLOCK_GFX_3D,
6145                                 pp_support_state,
6146                                 pp_state);
6147                 amd_set_clockgating_by_smu(pp_handle, msg_id);
6148         }
6149
6150         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
6151                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
6152                         pp_support_state = PP_STATE_SUPPORT_LS;
6153                         pp_state = PP_STATE_LS;
6154                 }
6155
6156                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
6157                         pp_support_state |= PP_STATE_SUPPORT_CG;
6158                         pp_state |= PP_STATE_CG;
6159                 }
6160
6161                 if (state == AMD_CG_STATE_UNGATE)
6162                         pp_state = 0;
6163
6164                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6165                                 PP_BLOCK_GFX_MG,
6166                                 pp_support_state,
6167                                 pp_state);
6168                 amd_set_clockgating_by_smu(pp_handle, msg_id);
6169         }
6170
6171         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
6172                 pp_support_state = PP_STATE_SUPPORT_LS;
6173
6174                 if (state == AMD_CG_STATE_UNGATE)
6175                         pp_state = 0;
6176                 else
6177                         pp_state = PP_STATE_LS;
6178
6179                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6180                                 PP_BLOCK_GFX_RLC,
6181                                 pp_support_state,
6182                                 pp_state);
6183                 amd_set_clockgating_by_smu(pp_handle, msg_id);
6184         }
6185
6186         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
6187                 pp_support_state = PP_STATE_SUPPORT_LS;
6188
6189                 if (state == AMD_CG_STATE_UNGATE)
6190                         pp_state = 0;
6191                 else
6192                         pp_state = PP_STATE_LS;
6193                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6194                         PP_BLOCK_GFX_CP,
6195                         pp_support_state,
6196                         pp_state);
6197                 amd_set_clockgating_by_smu(pp_handle, msg_id);
6198         }
6199
6200         return 0;
6201 }
6202
6203 static int gfx_v8_0_set_clockgating_state(void *handle,
6204                                           enum amd_clockgating_state state)
6205 {
6206         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
6207
6208         if (amdgpu_sriov_vf(adev))
6209                 return 0;
6210
6211         switch (adev->asic_type) {
6212         case CHIP_FIJI:
6213         case CHIP_CARRIZO:
6214         case CHIP_STONEY:
6215                 gfx_v8_0_update_gfx_clock_gating(adev,
6216                                                  state == AMD_CG_STATE_GATE);
6217                 break;
6218         case CHIP_TONGA:
6219                 gfx_v8_0_tonga_update_gfx_clock_gating(adev, state);
6220                 break;
6221         case CHIP_POLARIS10:
6222         case CHIP_POLARIS11:
6223         case CHIP_POLARIS12:
6224                 gfx_v8_0_polaris_update_gfx_clock_gating(adev, state);
6225                 break;
6226         default:
6227                 break;
6228         }
6229         return 0;
6230 }
6231
6232 static u64 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring)
6233 {
6234         return ring->adev->wb.wb[ring->rptr_offs];
6235 }
6236
6237 static u64 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
6238 {
6239         struct amdgpu_device *adev = ring->adev;
6240
6241         if (ring->use_doorbell)
6242                 /* XXX check if swapping is necessary on BE */
6243                 return ring->adev->wb.wb[ring->wptr_offs];
6244         else
6245                 return RREG32(mmCP_RB0_WPTR);
6246 }
6247
6248 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
6249 {
6250         struct amdgpu_device *adev = ring->adev;
6251
6252         if (ring->use_doorbell) {
6253                 /* XXX check if swapping is necessary on BE */
6254                 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6255                 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6256         } else {
6257                 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
6258                 (void)RREG32(mmCP_RB0_WPTR);
6259         }
6260 }
6261
6262 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
6263 {
6264         u32 ref_and_mask, reg_mem_engine;
6265
6266         if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) ||
6267             (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)) {
6268                 switch (ring->me) {
6269                 case 1:
6270                         ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
6271                         break;
6272                 case 2:
6273                         ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
6274                         break;
6275                 default:
6276                         return;
6277                 }
6278                 reg_mem_engine = 0;
6279         } else {
6280                 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
6281                 reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
6282         }
6283
6284         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6285         amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
6286                                  WAIT_REG_MEM_FUNCTION(3) |  /* == */
6287                                  reg_mem_engine));
6288         amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
6289         amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
6290         amdgpu_ring_write(ring, ref_and_mask);
6291         amdgpu_ring_write(ring, ref_and_mask);
6292         amdgpu_ring_write(ring, 0x20); /* poll interval */
6293 }
6294
6295 static void gfx_v8_0_ring_emit_vgt_flush(struct amdgpu_ring *ring)
6296 {
6297         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6298         amdgpu_ring_write(ring, EVENT_TYPE(VS_PARTIAL_FLUSH) |
6299                 EVENT_INDEX(4));
6300
6301         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6302         amdgpu_ring_write(ring, EVENT_TYPE(VGT_FLUSH) |
6303                 EVENT_INDEX(0));
6304 }
6305
6306
6307 static void gfx_v8_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
6308 {
6309         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6310         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6311                                  WRITE_DATA_DST_SEL(0) |
6312                                  WR_CONFIRM));
6313         amdgpu_ring_write(ring, mmHDP_DEBUG0);
6314         amdgpu_ring_write(ring, 0);
6315         amdgpu_ring_write(ring, 1);
6316
6317 }
6318
6319 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
6320                                       struct amdgpu_ib *ib,
6321                                       unsigned vm_id, bool ctx_switch)
6322 {
6323         u32 header, control = 0;
6324
6325         if (ib->flags & AMDGPU_IB_FLAG_CE)
6326                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
6327         else
6328                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
6329
6330         control |= ib->length_dw | (vm_id << 24);
6331
6332         if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
6333                 control |= INDIRECT_BUFFER_PRE_ENB(1);
6334
6335                 if (!(ib->flags & AMDGPU_IB_FLAG_CE))
6336                         gfx_v8_0_ring_emit_de_meta(ring);
6337         }
6338
6339         amdgpu_ring_write(ring, header);
6340         amdgpu_ring_write(ring,
6341 #ifdef __BIG_ENDIAN
6342                           (2 << 0) |
6343 #endif
6344                           (ib->gpu_addr & 0xFFFFFFFC));
6345         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6346         amdgpu_ring_write(ring, control);
6347 }
6348
6349 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
6350                                           struct amdgpu_ib *ib,
6351                                           unsigned vm_id, bool ctx_switch)
6352 {
6353         u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vm_id << 24);
6354
6355         amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
6356         amdgpu_ring_write(ring,
6357 #ifdef __BIG_ENDIAN
6358                                 (2 << 0) |
6359 #endif
6360                                 (ib->gpu_addr & 0xFFFFFFFC));
6361         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6362         amdgpu_ring_write(ring, control);
6363 }
6364
6365 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
6366                                          u64 seq, unsigned flags)
6367 {
6368         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6369         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6370
6371         /* EVENT_WRITE_EOP - flush caches, send int */
6372         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6373         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6374                                  EOP_TC_ACTION_EN |
6375                                  EOP_TC_WB_ACTION_EN |
6376                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6377                                  EVENT_INDEX(5)));
6378         amdgpu_ring_write(ring, addr & 0xfffffffc);
6379         amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
6380                           DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6381         amdgpu_ring_write(ring, lower_32_bits(seq));
6382         amdgpu_ring_write(ring, upper_32_bits(seq));
6383
6384 }
6385
6386 static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
6387 {
6388         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6389         uint32_t seq = ring->fence_drv.sync_seq;
6390         uint64_t addr = ring->fence_drv.gpu_addr;
6391
6392         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6393         amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
6394                                  WAIT_REG_MEM_FUNCTION(3) | /* equal */
6395                                  WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
6396         amdgpu_ring_write(ring, addr & 0xfffffffc);
6397         amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
6398         amdgpu_ring_write(ring, seq);
6399         amdgpu_ring_write(ring, 0xffffffff);
6400         amdgpu_ring_write(ring, 4); /* poll interval */
6401 }
6402
6403 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
6404                                         unsigned vm_id, uint64_t pd_addr)
6405 {
6406         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6407
6408         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6409         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6410                                  WRITE_DATA_DST_SEL(0)) |
6411                                  WR_CONFIRM);
6412         if (vm_id < 8) {
6413                 amdgpu_ring_write(ring,
6414                                   (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id));
6415         } else {
6416                 amdgpu_ring_write(ring,
6417                                   (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vm_id - 8));
6418         }
6419         amdgpu_ring_write(ring, 0);
6420         amdgpu_ring_write(ring, pd_addr >> 12);
6421
6422         /* bits 0-15 are the VM contexts0-15 */
6423         /* invalidate the cache */
6424         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6425         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6426                                  WRITE_DATA_DST_SEL(0)));
6427         amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6428         amdgpu_ring_write(ring, 0);
6429         amdgpu_ring_write(ring, 1 << vm_id);
6430
6431         /* wait for the invalidate to complete */
6432         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6433         amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6434                                  WAIT_REG_MEM_FUNCTION(0) |  /* always */
6435                                  WAIT_REG_MEM_ENGINE(0))); /* me */
6436         amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6437         amdgpu_ring_write(ring, 0);
6438         amdgpu_ring_write(ring, 0); /* ref */
6439         amdgpu_ring_write(ring, 0); /* mask */
6440         amdgpu_ring_write(ring, 0x20); /* poll interval */
6441
6442         /* compute doesn't have PFP */
6443         if (usepfp) {
6444                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
6445                 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6446                 amdgpu_ring_write(ring, 0x0);
6447         }
6448 }
6449
6450 static u64 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
6451 {
6452         return ring->adev->wb.wb[ring->wptr_offs];
6453 }
6454
6455 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
6456 {
6457         struct amdgpu_device *adev = ring->adev;
6458
6459         /* XXX check if swapping is necessary on BE */
6460         adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6461         WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6462 }
6463
6464 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
6465                                              u64 addr, u64 seq,
6466                                              unsigned flags)
6467 {
6468         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6469         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6470
6471         /* RELEASE_MEM - flush caches, send int */
6472         amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
6473         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6474                                  EOP_TC_ACTION_EN |
6475                                  EOP_TC_WB_ACTION_EN |
6476                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6477                                  EVENT_INDEX(5)));
6478         amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6479         amdgpu_ring_write(ring, addr & 0xfffffffc);
6480         amdgpu_ring_write(ring, upper_32_bits(addr));
6481         amdgpu_ring_write(ring, lower_32_bits(seq));
6482         amdgpu_ring_write(ring, upper_32_bits(seq));
6483 }
6484
6485 static void gfx_v8_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
6486                                          u64 seq, unsigned int flags)
6487 {
6488         /* we only allocate 32bit for each seq wb address */
6489         BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
6490
6491         /* write fence seq to the "addr" */
6492         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6493         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6494                                  WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
6495         amdgpu_ring_write(ring, lower_32_bits(addr));
6496         amdgpu_ring_write(ring, upper_32_bits(addr));
6497         amdgpu_ring_write(ring, lower_32_bits(seq));
6498
6499         if (flags & AMDGPU_FENCE_FLAG_INT) {
6500                 /* set register to trigger INT */
6501                 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6502                 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6503                                          WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
6504                 amdgpu_ring_write(ring, mmCPC_INT_STATUS);
6505                 amdgpu_ring_write(ring, 0);
6506                 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
6507         }
6508 }
6509
6510 static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring)
6511 {
6512         amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6513         amdgpu_ring_write(ring, 0);
6514 }
6515
6516 static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
6517 {
6518         uint32_t dw2 = 0;
6519
6520         if (amdgpu_sriov_vf(ring->adev))
6521                 gfx_v8_0_ring_emit_ce_meta(ring);
6522
6523         dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
6524         if (flags & AMDGPU_HAVE_CTX_SWITCH) {
6525                 gfx_v8_0_ring_emit_vgt_flush(ring);
6526                 /* set load_global_config & load_global_uconfig */
6527                 dw2 |= 0x8001;
6528                 /* set load_cs_sh_regs */
6529                 dw2 |= 0x01000000;
6530                 /* set load_per_context_state & load_gfx_sh_regs for GFX */
6531                 dw2 |= 0x10002;
6532
6533                 /* set load_ce_ram if preamble presented */
6534                 if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
6535                         dw2 |= 0x10000000;
6536         } else {
6537                 /* still load_ce_ram if this is the first time preamble presented
6538                  * although there is no context switch happens.
6539                  */
6540                 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
6541                         dw2 |= 0x10000000;
6542         }
6543
6544         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6545         amdgpu_ring_write(ring, dw2);
6546         amdgpu_ring_write(ring, 0);
6547 }
6548
6549 static unsigned gfx_v8_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
6550 {
6551         unsigned ret;
6552
6553         amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
6554         amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
6555         amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
6556         amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
6557         ret = ring->wptr & ring->buf_mask;
6558         amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
6559         return ret;
6560 }
6561
6562 static void gfx_v8_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
6563 {
6564         unsigned cur;
6565
6566         BUG_ON(offset > ring->buf_mask);
6567         BUG_ON(ring->ring[offset] != 0x55aa55aa);
6568
6569         cur = (ring->wptr & ring->buf_mask) - 1;
6570         if (likely(cur > offset))
6571                 ring->ring[offset] = cur - offset;
6572         else
6573                 ring->ring[offset] = (ring->ring_size >> 2) - offset + cur;
6574 }
6575
6576 static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
6577 {
6578         struct amdgpu_device *adev = ring->adev;
6579
6580         amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
6581         amdgpu_ring_write(ring, 0 |     /* src: register*/
6582                                 (5 << 8) |      /* dst: memory */
6583                                 (1 << 20));     /* write confirm */
6584         amdgpu_ring_write(ring, reg);
6585         amdgpu_ring_write(ring, 0);
6586         amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
6587                                 adev->virt.reg_val_offs * 4));
6588         amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
6589                                 adev->virt.reg_val_offs * 4));
6590 }
6591
6592 static void gfx_v8_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
6593                                   uint32_t val)
6594 {
6595         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6596         amdgpu_ring_write(ring, (1 << 16)); /* no inc addr */
6597         amdgpu_ring_write(ring, reg);
6598         amdgpu_ring_write(ring, 0);
6599         amdgpu_ring_write(ring, val);
6600 }
6601
6602 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
6603                                                  enum amdgpu_interrupt_state state)
6604 {
6605         WREG32_FIELD(CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE,
6606                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6607 }
6608
6609 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
6610                                                      int me, int pipe,
6611                                                      enum amdgpu_interrupt_state state)
6612 {
6613         u32 mec_int_cntl, mec_int_cntl_reg;
6614
6615         /*
6616          * amdgpu controls only the first MEC. That's why this function only
6617          * handles the setting of interrupts for this specific MEC. All other
6618          * pipes' interrupts are set by amdkfd.
6619          */
6620
6621         if (me == 1) {
6622                 switch (pipe) {
6623                 case 0:
6624                         mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
6625                         break;
6626                 case 1:
6627                         mec_int_cntl_reg = mmCP_ME1_PIPE1_INT_CNTL;
6628                         break;
6629                 case 2:
6630                         mec_int_cntl_reg = mmCP_ME1_PIPE2_INT_CNTL;
6631                         break;
6632                 case 3:
6633                         mec_int_cntl_reg = mmCP_ME1_PIPE3_INT_CNTL;
6634                         break;
6635                 default:
6636                         DRM_DEBUG("invalid pipe %d\n", pipe);
6637                         return;
6638                 }
6639         } else {
6640                 DRM_DEBUG("invalid me %d\n", me);
6641                 return;
6642         }
6643
6644         switch (state) {
6645         case AMDGPU_IRQ_STATE_DISABLE:
6646                 mec_int_cntl = RREG32(mec_int_cntl_reg);
6647                 mec_int_cntl &= ~CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6648                 WREG32(mec_int_cntl_reg, mec_int_cntl);
6649                 break;
6650         case AMDGPU_IRQ_STATE_ENABLE:
6651                 mec_int_cntl = RREG32(mec_int_cntl_reg);
6652                 mec_int_cntl |= CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6653                 WREG32(mec_int_cntl_reg, mec_int_cntl);
6654                 break;
6655         default:
6656                 break;
6657         }
6658 }
6659
6660 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
6661                                              struct amdgpu_irq_src *source,
6662                                              unsigned type,
6663                                              enum amdgpu_interrupt_state state)
6664 {
6665         WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE,
6666                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6667
6668         return 0;
6669 }
6670
6671 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
6672                                               struct amdgpu_irq_src *source,
6673                                               unsigned type,
6674                                               enum amdgpu_interrupt_state state)
6675 {
6676         WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE,
6677                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6678
6679         return 0;
6680 }
6681
6682 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6683                                             struct amdgpu_irq_src *src,
6684                                             unsigned type,
6685                                             enum amdgpu_interrupt_state state)
6686 {
6687         switch (type) {
6688         case AMDGPU_CP_IRQ_GFX_EOP:
6689                 gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
6690                 break;
6691         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6692                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6693                 break;
6694         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6695                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6696                 break;
6697         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6698                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6699                 break;
6700         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6701                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6702                 break;
6703         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6704                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6705                 break;
6706         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6707                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6708                 break;
6709         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6710                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6711                 break;
6712         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6713                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6714                 break;
6715         default:
6716                 break;
6717         }
6718         return 0;
6719 }
6720
6721 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
6722                             struct amdgpu_irq_src *source,
6723                             struct amdgpu_iv_entry *entry)
6724 {
6725         int i;
6726         u8 me_id, pipe_id, queue_id;
6727         struct amdgpu_ring *ring;
6728
6729         DRM_DEBUG("IH: CP EOP\n");
6730         me_id = (entry->ring_id & 0x0c) >> 2;
6731         pipe_id = (entry->ring_id & 0x03) >> 0;
6732         queue_id = (entry->ring_id & 0x70) >> 4;
6733
6734         switch (me_id) {
6735         case 0:
6736                 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6737                 break;
6738         case 1:
6739         case 2:
6740                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6741                         ring = &adev->gfx.compute_ring[i];
6742                         /* Per-queue interrupt is supported for MEC starting from VI.
6743                           * The interrupt can only be enabled/disabled per pipe instead of per queue.
6744                           */
6745                         if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6746                                 amdgpu_fence_process(ring);
6747                 }
6748                 break;
6749         }
6750         return 0;
6751 }
6752
6753 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
6754                                  struct amdgpu_irq_src *source,
6755                                  struct amdgpu_iv_entry *entry)
6756 {
6757         DRM_ERROR("Illegal register access in command stream\n");
6758         schedule_work(&adev->reset_work);
6759         return 0;
6760 }
6761
6762 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
6763                                   struct amdgpu_irq_src *source,
6764                                   struct amdgpu_iv_entry *entry)
6765 {
6766         DRM_ERROR("Illegal instruction in command stream\n");
6767         schedule_work(&adev->reset_work);
6768         return 0;
6769 }
6770
6771 static int gfx_v8_0_kiq_set_interrupt_state(struct amdgpu_device *adev,
6772                                             struct amdgpu_irq_src *src,
6773                                             unsigned int type,
6774                                             enum amdgpu_interrupt_state state)
6775 {
6776         struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
6777
6778         switch (type) {
6779         case AMDGPU_CP_KIQ_IRQ_DRIVER0:
6780                 WREG32_FIELD(CPC_INT_CNTL, GENERIC2_INT_ENABLE,
6781                              state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6782                 if (ring->me == 1)
6783                         WREG32_FIELD_OFFSET(CP_ME1_PIPE0_INT_CNTL,
6784                                      ring->pipe,
6785                                      GENERIC2_INT_ENABLE,
6786                                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6787                 else
6788                         WREG32_FIELD_OFFSET(CP_ME2_PIPE0_INT_CNTL,
6789                                      ring->pipe,
6790                                      GENERIC2_INT_ENABLE,
6791                                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6792                 break;
6793         default:
6794                 BUG(); /* kiq only support GENERIC2_INT now */
6795                 break;
6796         }
6797         return 0;
6798 }
6799
6800 static int gfx_v8_0_kiq_irq(struct amdgpu_device *adev,
6801                             struct amdgpu_irq_src *source,
6802                             struct amdgpu_iv_entry *entry)
6803 {
6804         u8 me_id, pipe_id, queue_id;
6805         struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
6806
6807         me_id = (entry->ring_id & 0x0c) >> 2;
6808         pipe_id = (entry->ring_id & 0x03) >> 0;
6809         queue_id = (entry->ring_id & 0x70) >> 4;
6810         DRM_DEBUG("IH: CPC GENERIC2_INT, me:%d, pipe:%d, queue:%d\n",
6811                    me_id, pipe_id, queue_id);
6812
6813         amdgpu_fence_process(ring);
6814         return 0;
6815 }
6816
6817 static const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
6818         .name = "gfx_v8_0",
6819         .early_init = gfx_v8_0_early_init,
6820         .late_init = gfx_v8_0_late_init,
6821         .sw_init = gfx_v8_0_sw_init,
6822         .sw_fini = gfx_v8_0_sw_fini,
6823         .hw_init = gfx_v8_0_hw_init,
6824         .hw_fini = gfx_v8_0_hw_fini,
6825         .suspend = gfx_v8_0_suspend,
6826         .resume = gfx_v8_0_resume,
6827         .is_idle = gfx_v8_0_is_idle,
6828         .wait_for_idle = gfx_v8_0_wait_for_idle,
6829         .check_soft_reset = gfx_v8_0_check_soft_reset,
6830         .pre_soft_reset = gfx_v8_0_pre_soft_reset,
6831         .soft_reset = gfx_v8_0_soft_reset,
6832         .post_soft_reset = gfx_v8_0_post_soft_reset,
6833         .set_clockgating_state = gfx_v8_0_set_clockgating_state,
6834         .set_powergating_state = gfx_v8_0_set_powergating_state,
6835         .get_clockgating_state = gfx_v8_0_get_clockgating_state,
6836 };
6837
6838 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
6839         .type = AMDGPU_RING_TYPE_GFX,
6840         .align_mask = 0xff,
6841         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6842         .support_64bit_ptrs = false,
6843         .get_rptr = gfx_v8_0_ring_get_rptr,
6844         .get_wptr = gfx_v8_0_ring_get_wptr_gfx,
6845         .set_wptr = gfx_v8_0_ring_set_wptr_gfx,
6846         .emit_frame_size = /* maximum 215dw if count 16 IBs in */
6847                 5 +  /* COND_EXEC */
6848                 7 +  /* PIPELINE_SYNC */
6849                 19 + /* VM_FLUSH */
6850                 8 +  /* FENCE for VM_FLUSH */
6851                 20 + /* GDS switch */
6852                 4 + /* double SWITCH_BUFFER,
6853                        the first COND_EXEC jump to the place just
6854                            prior to this double SWITCH_BUFFER  */
6855                 5 + /* COND_EXEC */
6856                 7 +      /*     HDP_flush */
6857                 4 +      /*     VGT_flush */
6858                 14 + /* CE_META */
6859                 31 + /* DE_META */
6860                 3 + /* CNTX_CTRL */
6861                 5 + /* HDP_INVL */
6862                 8 + 8 + /* FENCE x2 */
6863                 2, /* SWITCH_BUFFER */
6864         .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_gfx */
6865         .emit_ib = gfx_v8_0_ring_emit_ib_gfx,
6866         .emit_fence = gfx_v8_0_ring_emit_fence_gfx,
6867         .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6868         .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6869         .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6870         .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6871         .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
6872         .test_ring = gfx_v8_0_ring_test_ring,
6873         .test_ib = gfx_v8_0_ring_test_ib,
6874         .insert_nop = amdgpu_ring_insert_nop,
6875         .pad_ib = amdgpu_ring_generic_pad_ib,
6876         .emit_switch_buffer = gfx_v8_ring_emit_sb,
6877         .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
6878         .init_cond_exec = gfx_v8_0_ring_emit_init_cond_exec,
6879         .patch_cond_exec = gfx_v8_0_ring_emit_patch_cond_exec,
6880 };
6881
6882 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
6883         .type = AMDGPU_RING_TYPE_COMPUTE,
6884         .align_mask = 0xff,
6885         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6886         .support_64bit_ptrs = false,
6887         .get_rptr = gfx_v8_0_ring_get_rptr,
6888         .get_wptr = gfx_v8_0_ring_get_wptr_compute,
6889         .set_wptr = gfx_v8_0_ring_set_wptr_compute,
6890         .emit_frame_size =
6891                 20 + /* gfx_v8_0_ring_emit_gds_switch */
6892                 7 + /* gfx_v8_0_ring_emit_hdp_flush */
6893                 5 + /* gfx_v8_0_ring_emit_hdp_invalidate */
6894                 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6895                 17 + /* gfx_v8_0_ring_emit_vm_flush */
6896                 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
6897         .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */
6898         .emit_ib = gfx_v8_0_ring_emit_ib_compute,
6899         .emit_fence = gfx_v8_0_ring_emit_fence_compute,
6900         .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6901         .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6902         .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6903         .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6904         .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
6905         .test_ring = gfx_v8_0_ring_test_ring,
6906         .test_ib = gfx_v8_0_ring_test_ib,
6907         .insert_nop = amdgpu_ring_insert_nop,
6908         .pad_ib = amdgpu_ring_generic_pad_ib,
6909 };
6910
6911 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {
6912         .type = AMDGPU_RING_TYPE_KIQ,
6913         .align_mask = 0xff,
6914         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6915         .support_64bit_ptrs = false,
6916         .get_rptr = gfx_v8_0_ring_get_rptr,
6917         .get_wptr = gfx_v8_0_ring_get_wptr_compute,
6918         .set_wptr = gfx_v8_0_ring_set_wptr_compute,
6919         .emit_frame_size =
6920                 20 + /* gfx_v8_0_ring_emit_gds_switch */
6921                 7 + /* gfx_v8_0_ring_emit_hdp_flush */
6922                 5 + /* gfx_v8_0_ring_emit_hdp_invalidate */
6923                 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6924                 17 + /* gfx_v8_0_ring_emit_vm_flush */
6925                 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */
6926         .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */
6927         .emit_ib = gfx_v8_0_ring_emit_ib_compute,
6928         .emit_fence = gfx_v8_0_ring_emit_fence_kiq,
6929         .test_ring = gfx_v8_0_ring_test_ring,
6930         .test_ib = gfx_v8_0_ring_test_ib,
6931         .insert_nop = amdgpu_ring_insert_nop,
6932         .pad_ib = amdgpu_ring_generic_pad_ib,
6933         .emit_rreg = gfx_v8_0_ring_emit_rreg,
6934         .emit_wreg = gfx_v8_0_ring_emit_wreg,
6935 };
6936
6937 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
6938 {
6939         int i;
6940
6941         adev->gfx.kiq.ring.funcs = &gfx_v8_0_ring_funcs_kiq;
6942
6943         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
6944                 adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
6945
6946         for (i = 0; i < adev->gfx.num_compute_rings; i++)
6947                 adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
6948 }
6949
6950 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
6951         .set = gfx_v8_0_set_eop_interrupt_state,
6952         .process = gfx_v8_0_eop_irq,
6953 };
6954
6955 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
6956         .set = gfx_v8_0_set_priv_reg_fault_state,
6957         .process = gfx_v8_0_priv_reg_irq,
6958 };
6959
6960 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
6961         .set = gfx_v8_0_set_priv_inst_fault_state,
6962         .process = gfx_v8_0_priv_inst_irq,
6963 };
6964
6965 static const struct amdgpu_irq_src_funcs gfx_v8_0_kiq_irq_funcs = {
6966         .set = gfx_v8_0_kiq_set_interrupt_state,
6967         .process = gfx_v8_0_kiq_irq,
6968 };
6969
6970 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
6971 {
6972         adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
6973         adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
6974
6975         adev->gfx.priv_reg_irq.num_types = 1;
6976         adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
6977
6978         adev->gfx.priv_inst_irq.num_types = 1;
6979         adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
6980
6981         adev->gfx.kiq.irq.num_types = AMDGPU_CP_KIQ_IRQ_LAST;
6982         adev->gfx.kiq.irq.funcs = &gfx_v8_0_kiq_irq_funcs;
6983 }
6984
6985 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
6986 {
6987         adev->gfx.rlc.funcs = &iceland_rlc_funcs;
6988 }
6989
6990 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
6991 {
6992         /* init asci gds info */
6993         adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
6994         adev->gds.gws.total_size = 64;
6995         adev->gds.oa.total_size = 16;
6996
6997         if (adev->gds.mem.total_size == 64 * 1024) {
6998                 adev->gds.mem.gfx_partition_size = 4096;
6999                 adev->gds.mem.cs_partition_size = 4096;
7000
7001                 adev->gds.gws.gfx_partition_size = 4;
7002                 adev->gds.gws.cs_partition_size = 4;
7003
7004                 adev->gds.oa.gfx_partition_size = 4;
7005                 adev->gds.oa.cs_partition_size = 1;
7006         } else {
7007                 adev->gds.mem.gfx_partition_size = 1024;
7008                 adev->gds.mem.cs_partition_size = 1024;
7009
7010                 adev->gds.gws.gfx_partition_size = 16;
7011                 adev->gds.gws.cs_partition_size = 16;
7012
7013                 adev->gds.oa.gfx_partition_size = 4;
7014                 adev->gds.oa.cs_partition_size = 4;
7015         }
7016 }
7017
7018 static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
7019                                                  u32 bitmap)
7020 {
7021         u32 data;
7022
7023         if (!bitmap)
7024                 return;
7025
7026         data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7027         data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7028
7029         WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
7030 }
7031
7032 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
7033 {
7034         u32 data, mask;
7035
7036         data =  RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) |
7037                 RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
7038
7039         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
7040
7041         return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask;
7042 }
7043
7044 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
7045 {
7046         int i, j, k, counter, active_cu_number = 0;
7047         u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
7048         struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
7049         unsigned disable_masks[4 * 2];
7050         u32 ao_cu_num;
7051
7052         memset(cu_info, 0, sizeof(*cu_info));
7053
7054         if (adev->flags & AMD_IS_APU)
7055                 ao_cu_num = 2;
7056         else
7057                 ao_cu_num = adev->gfx.config.max_cu_per_sh;
7058
7059         amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
7060
7061         mutex_lock(&adev->grbm_idx_mutex);
7062         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
7063                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
7064                         mask = 1;
7065                         ao_bitmap = 0;
7066                         counter = 0;
7067                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
7068                         if (i < 4 && j < 2)
7069                                 gfx_v8_0_set_user_cu_inactive_bitmap(
7070                                         adev, disable_masks[i * 2 + j]);
7071                         bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
7072                         cu_info->bitmap[i][j] = bitmap;
7073
7074                         for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
7075                                 if (bitmap & mask) {
7076                                         if (counter < ao_cu_num)
7077                                                 ao_bitmap |= mask;
7078                                         counter ++;
7079                                 }
7080                                 mask <<= 1;
7081                         }
7082                         active_cu_number += counter;
7083                         ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
7084                 }
7085         }
7086         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
7087         mutex_unlock(&adev->grbm_idx_mutex);
7088
7089         cu_info->number = active_cu_number;
7090         cu_info->ao_cu_mask = ao_cu_mask;
7091 }
7092
7093 const struct amdgpu_ip_block_version gfx_v8_0_ip_block =
7094 {
7095         .type = AMD_IP_BLOCK_TYPE_GFX,
7096         .major = 8,
7097         .minor = 0,
7098         .rev = 0,
7099         .funcs = &gfx_v8_0_ip_funcs,
7100 };
7101
7102 const struct amdgpu_ip_block_version gfx_v8_1_ip_block =
7103 {
7104         .type = AMD_IP_BLOCK_TYPE_GFX,
7105         .major = 8,
7106         .minor = 1,
7107         .rev = 0,
7108         .funcs = &gfx_v8_0_ip_funcs,
7109 };
7110
7111 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
7112 {
7113         uint64_t ce_payload_addr;
7114         int cnt_ce;
7115         static union {
7116                 struct vi_ce_ib_state regular;
7117                 struct vi_ce_ib_state_chained_ib chained;
7118         } ce_payload = {};
7119
7120         if (ring->adev->virt.chained_ib_support) {
7121                 ce_payload_addr = AMDGPU_VA_RESERVED_SIZE - 2 * 4096 +
7122                                                   offsetof(struct vi_gfx_meta_data_chained_ib, ce_payload);
7123                 cnt_ce = (sizeof(ce_payload.chained) >> 2) + 4 - 2;
7124         } else {
7125                 ce_payload_addr = AMDGPU_VA_RESERVED_SIZE - 2 * 4096 +
7126                                                   offsetof(struct vi_gfx_meta_data, ce_payload);
7127                 cnt_ce = (sizeof(ce_payload.regular) >> 2) + 4 - 2;
7128         }
7129
7130         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_ce));
7131         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
7132                                 WRITE_DATA_DST_SEL(8) |
7133                                 WR_CONFIRM) |
7134                                 WRITE_DATA_CACHE_POLICY(0));
7135         amdgpu_ring_write(ring, lower_32_bits(ce_payload_addr));
7136         amdgpu_ring_write(ring, upper_32_bits(ce_payload_addr));
7137         amdgpu_ring_write_multiple(ring, (void *)&ce_payload, cnt_ce - 2);
7138 }
7139
7140 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring)
7141 {
7142         uint64_t de_payload_addr, gds_addr, csa_addr;
7143         int cnt_de;
7144         static union {
7145                 struct vi_de_ib_state regular;
7146                 struct vi_de_ib_state_chained_ib chained;
7147         } de_payload = {};
7148
7149         csa_addr = AMDGPU_VA_RESERVED_SIZE - 2 * 4096;
7150         gds_addr = csa_addr + 4096;
7151         if (ring->adev->virt.chained_ib_support) {
7152                 de_payload.chained.gds_backup_addrlo = lower_32_bits(gds_addr);
7153                 de_payload.chained.gds_backup_addrhi = upper_32_bits(gds_addr);
7154                 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data_chained_ib, de_payload);
7155                 cnt_de = (sizeof(de_payload.chained) >> 2) + 4 - 2;
7156         } else {
7157                 de_payload.regular.gds_backup_addrlo = lower_32_bits(gds_addr);
7158                 de_payload.regular.gds_backup_addrhi = upper_32_bits(gds_addr);
7159                 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data, de_payload);
7160                 cnt_de = (sizeof(de_payload.regular) >> 2) + 4 - 2;
7161         }
7162
7163         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_de));
7164         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
7165                                 WRITE_DATA_DST_SEL(8) |
7166                                 WR_CONFIRM) |
7167                                 WRITE_DATA_CACHE_POLICY(0));
7168         amdgpu_ring_write(ring, lower_32_bits(de_payload_addr));
7169         amdgpu_ring_write(ring, upper_32_bits(de_payload_addr));
7170         amdgpu_ring_write_multiple(ring, (void *)&de_payload, cnt_de - 2);
7171 }
This page took 0.485179 seconds and 4 git commands to generate.