]> Git Repo - linux.git/blob - drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
Merge airlied/drm-next into drm-misc-next
[linux.git] / drivers / gpu / drm / amd / amdgpu / gfx_v8_0.c
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 #include <linux/firmware.h>
24 #include <drm/drmP.h>
25 #include "amdgpu.h"
26 #include "amdgpu_gfx.h"
27 #include "vi.h"
28 #include "vi_structs.h"
29 #include "vid.h"
30 #include "amdgpu_ucode.h"
31 #include "amdgpu_atombios.h"
32 #include "atombios_i2c.h"
33 #include "clearstate_vi.h"
34
35 #include "gmc/gmc_8_2_d.h"
36 #include "gmc/gmc_8_2_sh_mask.h"
37
38 #include "oss/oss_3_0_d.h"
39 #include "oss/oss_3_0_sh_mask.h"
40
41 #include "bif/bif_5_0_d.h"
42 #include "bif/bif_5_0_sh_mask.h"
43 #include "gca/gfx_8_0_d.h"
44 #include "gca/gfx_8_0_enum.h"
45 #include "gca/gfx_8_0_sh_mask.h"
46 #include "gca/gfx_8_0_enum.h"
47
48 #include "dce/dce_10_0_d.h"
49 #include "dce/dce_10_0_sh_mask.h"
50
51 #include "smu/smu_7_1_3_d.h"
52
53 #define GFX8_NUM_GFX_RINGS     1
54 #define GFX8_MEC_HPD_SIZE 2048
55
56 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
57 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
58 #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
59 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
60
61 #define ARRAY_MODE(x)                                   ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
62 #define PIPE_CONFIG(x)                                  ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
63 #define TILE_SPLIT(x)                                   ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
64 #define MICRO_TILE_MODE_NEW(x)                          ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
65 #define SAMPLE_SPLIT(x)                                 ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
66 #define BANK_WIDTH(x)                                   ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
67 #define BANK_HEIGHT(x)                                  ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
68 #define MACRO_TILE_ASPECT(x)                            ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
69 #define NUM_BANKS(x)                                    ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
70
71 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK            0x00000001L
72 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK            0x00000002L
73 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK           0x00000004L
74 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK           0x00000008L
75 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK           0x00000010L
76 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK           0x00000020L
77
78 /* BPM SERDES CMD */
79 #define SET_BPM_SERDES_CMD    1
80 #define CLE_BPM_SERDES_CMD    0
81
82 /* BPM Register Address*/
83 enum {
84         BPM_REG_CGLS_EN = 0,        /* Enable/Disable CGLS */
85         BPM_REG_CGLS_ON,            /* ON/OFF CGLS: shall be controlled by RLC FW */
86         BPM_REG_CGCG_OVERRIDE,      /* Set/Clear CGCG Override */
87         BPM_REG_MGCG_OVERRIDE,      /* Set/Clear MGCG Override */
88         BPM_REG_FGCG_OVERRIDE,      /* Set/Clear FGCG Override */
89         BPM_REG_FGCG_MAX
90 };
91
92 #define RLC_FormatDirectRegListLength        14
93
94 MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
95 MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
96 MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
97 MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
98 MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
99 MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
100
101 MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
102 MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
103 MODULE_FIRMWARE("amdgpu/stoney_me.bin");
104 MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
105 MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
106
107 MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
108 MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
109 MODULE_FIRMWARE("amdgpu/tonga_me.bin");
110 MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
111 MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
112 MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
113
114 MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
115 MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
116 MODULE_FIRMWARE("amdgpu/topaz_me.bin");
117 MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
118 MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
119
120 MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
121 MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
122 MODULE_FIRMWARE("amdgpu/fiji_me.bin");
123 MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
124 MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
125 MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
126
127 MODULE_FIRMWARE("amdgpu/polaris11_ce.bin");
128 MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin");
129 MODULE_FIRMWARE("amdgpu/polaris11_me.bin");
130 MODULE_FIRMWARE("amdgpu/polaris11_mec.bin");
131 MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin");
132 MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin");
133
134 MODULE_FIRMWARE("amdgpu/polaris10_ce.bin");
135 MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin");
136 MODULE_FIRMWARE("amdgpu/polaris10_me.bin");
137 MODULE_FIRMWARE("amdgpu/polaris10_mec.bin");
138 MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin");
139 MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin");
140
141 MODULE_FIRMWARE("amdgpu/polaris12_ce.bin");
142 MODULE_FIRMWARE("amdgpu/polaris12_pfp.bin");
143 MODULE_FIRMWARE("amdgpu/polaris12_me.bin");
144 MODULE_FIRMWARE("amdgpu/polaris12_mec.bin");
145 MODULE_FIRMWARE("amdgpu/polaris12_mec2.bin");
146 MODULE_FIRMWARE("amdgpu/polaris12_rlc.bin");
147
148 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
149 {
150         {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
151         {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
152         {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
153         {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
154         {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
155         {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
156         {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
157         {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
158         {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
159         {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
160         {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
161         {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
162         {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
163         {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
164         {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
165         {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
166 };
167
168 static const u32 golden_settings_tonga_a11[] =
169 {
170         mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
171         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
172         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
173         mmGB_GPU_ID, 0x0000000f, 0x00000000,
174         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
175         mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
176         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
177         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
178         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
179         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
180         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
181         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
182         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
183         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
184         mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
185         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
186 };
187
188 static const u32 tonga_golden_common_all[] =
189 {
190         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
191         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
192         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
193         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
194         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
195         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
196         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
197         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
198 };
199
200 static const u32 tonga_mgcg_cgcg_init[] =
201 {
202         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
203         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
204         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
205         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
206         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
207         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
208         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
209         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
210         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
211         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
212         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
213         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
214         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
215         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
216         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
217         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
218         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
219         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
220         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
221         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
222         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
223         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
224         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
225         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
226         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
227         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
228         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
229         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
230         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
231         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
232         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
233         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
234         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
235         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
236         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
237         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
238         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
239         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
240         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
241         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
242         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
243         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
244         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
245         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
246         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
247         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
248         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
249         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
250         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
251         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
252         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
253         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
254         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
255         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
256         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
257         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
258         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
259         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
260         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
261         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
262         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
263         mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
264         mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
265         mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
266         mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
267         mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
268         mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
269         mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
270         mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
271         mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
272         mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
273         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
274         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
275         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
276         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
277 };
278
279 static const u32 golden_settings_polaris11_a11[] =
280 {
281         mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208,
282         mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
283         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
284         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
285         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
286         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
287         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
288         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
289         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
290         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
291         mmSQ_CONFIG, 0x07f80000, 0x01180000,
292         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
293         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
294         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
295         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
296         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
297         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
298 };
299
300 static const u32 polaris11_golden_common_all[] =
301 {
302         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
303         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002,
304         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
305         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
306         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
307         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
308 };
309
310 static const u32 golden_settings_polaris10_a11[] =
311 {
312         mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
313         mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
314         mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
315         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
316         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
317         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
318         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
319         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
320         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a,
321         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
322         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
323         mmSQ_CONFIG, 0x07f80000, 0x07180000,
324         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
325         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
326         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
327         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
328         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
329 };
330
331 static const u32 polaris10_golden_common_all[] =
332 {
333         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
334         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
335         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
336         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
337         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
338         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
339         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
340         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
341 };
342
343 static const u32 fiji_golden_common_all[] =
344 {
345         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
346         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
347         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
348         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
349         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
350         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
351         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
352         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
353         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
354         mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
355 };
356
357 static const u32 golden_settings_fiji_a10[] =
358 {
359         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
360         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
361         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
362         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
363         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
364         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
365         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
366         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
367         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
368         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
369         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
370 };
371
372 static const u32 fiji_mgcg_cgcg_init[] =
373 {
374         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
375         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
376         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
377         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
378         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
379         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
380         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
381         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
382         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
383         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
384         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
385         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
386         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
387         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
388         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
389         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
390         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
391         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
392         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
393         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
394         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
395         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
396         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
397         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
398         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
399         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
400         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
401         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
402         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
403         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
404         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
405         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
406         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
407         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
408         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
409 };
410
411 static const u32 golden_settings_iceland_a11[] =
412 {
413         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
414         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
415         mmDB_DEBUG3, 0xc0000000, 0xc0000000,
416         mmGB_GPU_ID, 0x0000000f, 0x00000000,
417         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
418         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
419         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
420         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
421         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
422         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
423         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
424         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
425         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
426         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
427         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
428         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
429 };
430
431 static const u32 iceland_golden_common_all[] =
432 {
433         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
434         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
435         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
436         mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
437         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
438         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
439         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
440         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
441 };
442
443 static const u32 iceland_mgcg_cgcg_init[] =
444 {
445         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
446         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
447         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
448         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
449         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
450         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
451         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
452         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
453         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
454         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
455         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
456         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
457         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
458         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
459         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
460         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
461         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
462         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
463         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
464         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
465         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
466         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
467         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
468         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
469         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
470         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
471         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
472         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
473         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
474         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
475         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
476         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
477         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
478         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
479         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
480         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
481         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
482         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
483         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
484         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
485         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
486         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
487         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
488         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
489         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
490         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
491         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
492         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
493         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
494         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
495         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
496         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
497         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
498         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
499         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
500         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
501         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
502         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
503         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
504         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
505         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
506         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
507         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
508         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
509 };
510
511 static const u32 cz_golden_settings_a11[] =
512 {
513         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
514         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
515         mmGB_GPU_ID, 0x0000000f, 0x00000000,
516         mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
517         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
518         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
519         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
520         mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
521         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
522         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
523         mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
524         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
525 };
526
527 static const u32 cz_golden_common_all[] =
528 {
529         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
530         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
531         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
532         mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
533         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
534         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
535         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
536         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
537 };
538
539 static const u32 cz_mgcg_cgcg_init[] =
540 {
541         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
542         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
543         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
544         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
545         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
546         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
547         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
548         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
549         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
550         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
551         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
552         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
553         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
554         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
555         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
556         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
557         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
558         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
559         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
560         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
561         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
562         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
563         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
564         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
565         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
566         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
567         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
568         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
569         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
570         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
571         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
572         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
573         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
574         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
575         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
576         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
577         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
578         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
579         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
580         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
581         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
582         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
583         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
584         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
585         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
586         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
587         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
588         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
589         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
590         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
591         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
592         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
593         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
594         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
595         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
596         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
597         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
598         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
599         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
600         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
601         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
602         mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
603         mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
604         mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
605         mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
606         mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
607         mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
608         mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
609         mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
610         mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
611         mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
612         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
613         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
614         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
615         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
616 };
617
618 static const u32 stoney_golden_settings_a11[] =
619 {
620         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
621         mmGB_GPU_ID, 0x0000000f, 0x00000000,
622         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
623         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
624         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
625         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
626         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
627         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
628         mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
629         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
630 };
631
632 static const u32 stoney_golden_common_all[] =
633 {
634         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
635         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
636         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
637         mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
638         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
639         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
640         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
641         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
642 };
643
644 static const u32 stoney_mgcg_cgcg_init[] =
645 {
646         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
647         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
648         mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
649         mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
650         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
651 };
652
653 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
654 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
655 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
656 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
657 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
658 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
659 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring);
660 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring);
661
662 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
663 {
664         switch (adev->asic_type) {
665         case CHIP_TOPAZ:
666                 amdgpu_program_register_sequence(adev,
667                                                  iceland_mgcg_cgcg_init,
668                                                  (const u32)ARRAY_SIZE(iceland_mgcg_cgcg_init));
669                 amdgpu_program_register_sequence(adev,
670                                                  golden_settings_iceland_a11,
671                                                  (const u32)ARRAY_SIZE(golden_settings_iceland_a11));
672                 amdgpu_program_register_sequence(adev,
673                                                  iceland_golden_common_all,
674                                                  (const u32)ARRAY_SIZE(iceland_golden_common_all));
675                 break;
676         case CHIP_FIJI:
677                 amdgpu_program_register_sequence(adev,
678                                                  fiji_mgcg_cgcg_init,
679                                                  (const u32)ARRAY_SIZE(fiji_mgcg_cgcg_init));
680                 amdgpu_program_register_sequence(adev,
681                                                  golden_settings_fiji_a10,
682                                                  (const u32)ARRAY_SIZE(golden_settings_fiji_a10));
683                 amdgpu_program_register_sequence(adev,
684                                                  fiji_golden_common_all,
685                                                  (const u32)ARRAY_SIZE(fiji_golden_common_all));
686                 break;
687
688         case CHIP_TONGA:
689                 amdgpu_program_register_sequence(adev,
690                                                  tonga_mgcg_cgcg_init,
691                                                  (const u32)ARRAY_SIZE(tonga_mgcg_cgcg_init));
692                 amdgpu_program_register_sequence(adev,
693                                                  golden_settings_tonga_a11,
694                                                  (const u32)ARRAY_SIZE(golden_settings_tonga_a11));
695                 amdgpu_program_register_sequence(adev,
696                                                  tonga_golden_common_all,
697                                                  (const u32)ARRAY_SIZE(tonga_golden_common_all));
698                 break;
699         case CHIP_POLARIS11:
700         case CHIP_POLARIS12:
701                 amdgpu_program_register_sequence(adev,
702                                                  golden_settings_polaris11_a11,
703                                                  (const u32)ARRAY_SIZE(golden_settings_polaris11_a11));
704                 amdgpu_program_register_sequence(adev,
705                                                  polaris11_golden_common_all,
706                                                  (const u32)ARRAY_SIZE(polaris11_golden_common_all));
707                 break;
708         case CHIP_POLARIS10:
709                 amdgpu_program_register_sequence(adev,
710                                                  golden_settings_polaris10_a11,
711                                                  (const u32)ARRAY_SIZE(golden_settings_polaris10_a11));
712                 amdgpu_program_register_sequence(adev,
713                                                  polaris10_golden_common_all,
714                                                  (const u32)ARRAY_SIZE(polaris10_golden_common_all));
715                 WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C);
716                 if (adev->pdev->revision == 0xc7 &&
717                     ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) ||
718                      (adev->pdev->subsystem_device == 0x4a8 && adev->pdev->subsystem_vendor == 0x1043) ||
719                      (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1682))) {
720                         amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD);
721                         amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0);
722                 }
723                 break;
724         case CHIP_CARRIZO:
725                 amdgpu_program_register_sequence(adev,
726                                                  cz_mgcg_cgcg_init,
727                                                  (const u32)ARRAY_SIZE(cz_mgcg_cgcg_init));
728                 amdgpu_program_register_sequence(adev,
729                                                  cz_golden_settings_a11,
730                                                  (const u32)ARRAY_SIZE(cz_golden_settings_a11));
731                 amdgpu_program_register_sequence(adev,
732                                                  cz_golden_common_all,
733                                                  (const u32)ARRAY_SIZE(cz_golden_common_all));
734                 break;
735         case CHIP_STONEY:
736                 amdgpu_program_register_sequence(adev,
737                                                  stoney_mgcg_cgcg_init,
738                                                  (const u32)ARRAY_SIZE(stoney_mgcg_cgcg_init));
739                 amdgpu_program_register_sequence(adev,
740                                                  stoney_golden_settings_a11,
741                                                  (const u32)ARRAY_SIZE(stoney_golden_settings_a11));
742                 amdgpu_program_register_sequence(adev,
743                                                  stoney_golden_common_all,
744                                                  (const u32)ARRAY_SIZE(stoney_golden_common_all));
745                 break;
746         default:
747                 break;
748         }
749 }
750
751 static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
752 {
753         adev->gfx.scratch.num_reg = 8;
754         adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
755         adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
756 }
757
758 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
759 {
760         struct amdgpu_device *adev = ring->adev;
761         uint32_t scratch;
762         uint32_t tmp = 0;
763         unsigned i;
764         int r;
765
766         r = amdgpu_gfx_scratch_get(adev, &scratch);
767         if (r) {
768                 DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
769                 return r;
770         }
771         WREG32(scratch, 0xCAFEDEAD);
772         r = amdgpu_ring_alloc(ring, 3);
773         if (r) {
774                 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
775                           ring->idx, r);
776                 amdgpu_gfx_scratch_free(adev, scratch);
777                 return r;
778         }
779         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
780         amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
781         amdgpu_ring_write(ring, 0xDEADBEEF);
782         amdgpu_ring_commit(ring);
783
784         for (i = 0; i < adev->usec_timeout; i++) {
785                 tmp = RREG32(scratch);
786                 if (tmp == 0xDEADBEEF)
787                         break;
788                 DRM_UDELAY(1);
789         }
790         if (i < adev->usec_timeout) {
791                 DRM_INFO("ring test on %d succeeded in %d usecs\n",
792                          ring->idx, i);
793         } else {
794                 DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
795                           ring->idx, scratch, tmp);
796                 r = -EINVAL;
797         }
798         amdgpu_gfx_scratch_free(adev, scratch);
799         return r;
800 }
801
802 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
803 {
804         struct amdgpu_device *adev = ring->adev;
805         struct amdgpu_ib ib;
806         struct dma_fence *f = NULL;
807         uint32_t scratch;
808         uint32_t tmp = 0;
809         long r;
810
811         r = amdgpu_gfx_scratch_get(adev, &scratch);
812         if (r) {
813                 DRM_ERROR("amdgpu: failed to get scratch reg (%ld).\n", r);
814                 return r;
815         }
816         WREG32(scratch, 0xCAFEDEAD);
817         memset(&ib, 0, sizeof(ib));
818         r = amdgpu_ib_get(adev, NULL, 256, &ib);
819         if (r) {
820                 DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
821                 goto err1;
822         }
823         ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
824         ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START));
825         ib.ptr[2] = 0xDEADBEEF;
826         ib.length_dw = 3;
827
828         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
829         if (r)
830                 goto err2;
831
832         r = dma_fence_wait_timeout(f, false, timeout);
833         if (r == 0) {
834                 DRM_ERROR("amdgpu: IB test timed out.\n");
835                 r = -ETIMEDOUT;
836                 goto err2;
837         } else if (r < 0) {
838                 DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
839                 goto err2;
840         }
841         tmp = RREG32(scratch);
842         if (tmp == 0xDEADBEEF) {
843                 DRM_INFO("ib test on ring %d succeeded\n", ring->idx);
844                 r = 0;
845         } else {
846                 DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n",
847                           scratch, tmp);
848                 r = -EINVAL;
849         }
850 err2:
851         amdgpu_ib_free(adev, &ib, NULL);
852         dma_fence_put(f);
853 err1:
854         amdgpu_gfx_scratch_free(adev, scratch);
855         return r;
856 }
857
858
859 static void gfx_v8_0_free_microcode(struct amdgpu_device *adev)
860 {
861         release_firmware(adev->gfx.pfp_fw);
862         adev->gfx.pfp_fw = NULL;
863         release_firmware(adev->gfx.me_fw);
864         adev->gfx.me_fw = NULL;
865         release_firmware(adev->gfx.ce_fw);
866         adev->gfx.ce_fw = NULL;
867         release_firmware(adev->gfx.rlc_fw);
868         adev->gfx.rlc_fw = NULL;
869         release_firmware(adev->gfx.mec_fw);
870         adev->gfx.mec_fw = NULL;
871         if ((adev->asic_type != CHIP_STONEY) &&
872             (adev->asic_type != CHIP_TOPAZ))
873                 release_firmware(adev->gfx.mec2_fw);
874         adev->gfx.mec2_fw = NULL;
875
876         kfree(adev->gfx.rlc.register_list_format);
877 }
878
879 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
880 {
881         const char *chip_name;
882         char fw_name[30];
883         int err;
884         struct amdgpu_firmware_info *info = NULL;
885         const struct common_firmware_header *header = NULL;
886         const struct gfx_firmware_header_v1_0 *cp_hdr;
887         const struct rlc_firmware_header_v2_0 *rlc_hdr;
888         unsigned int *tmp = NULL, i;
889
890         DRM_DEBUG("\n");
891
892         switch (adev->asic_type) {
893         case CHIP_TOPAZ:
894                 chip_name = "topaz";
895                 break;
896         case CHIP_TONGA:
897                 chip_name = "tonga";
898                 break;
899         case CHIP_CARRIZO:
900                 chip_name = "carrizo";
901                 break;
902         case CHIP_FIJI:
903                 chip_name = "fiji";
904                 break;
905         case CHIP_POLARIS11:
906                 chip_name = "polaris11";
907                 break;
908         case CHIP_POLARIS10:
909                 chip_name = "polaris10";
910                 break;
911         case CHIP_POLARIS12:
912                 chip_name = "polaris12";
913                 break;
914         case CHIP_STONEY:
915                 chip_name = "stoney";
916                 break;
917         default:
918                 BUG();
919         }
920
921         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
922         err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
923         if (err)
924                 goto out;
925         err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
926         if (err)
927                 goto out;
928         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
929         adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
930         adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
931
932         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
933         err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
934         if (err)
935                 goto out;
936         err = amdgpu_ucode_validate(adev->gfx.me_fw);
937         if (err)
938                 goto out;
939         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
940         adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
941
942         adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
943
944         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
945         err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
946         if (err)
947                 goto out;
948         err = amdgpu_ucode_validate(adev->gfx.ce_fw);
949         if (err)
950                 goto out;
951         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
952         adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
953         adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
954
955         /*
956          * Support for MCBP/Virtualization in combination with chained IBs is
957          * formal released on feature version #46
958          */
959         if (adev->gfx.ce_feature_version >= 46 &&
960             adev->gfx.pfp_feature_version >= 46) {
961                 adev->virt.chained_ib_support = true;
962                 DRM_INFO("Chained IB support enabled!\n");
963         } else
964                 adev->virt.chained_ib_support = false;
965
966         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
967         err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
968         if (err)
969                 goto out;
970         err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
971         rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
972         adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
973         adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
974
975         adev->gfx.rlc.save_and_restore_offset =
976                         le32_to_cpu(rlc_hdr->save_and_restore_offset);
977         adev->gfx.rlc.clear_state_descriptor_offset =
978                         le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
979         adev->gfx.rlc.avail_scratch_ram_locations =
980                         le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
981         adev->gfx.rlc.reg_restore_list_size =
982                         le32_to_cpu(rlc_hdr->reg_restore_list_size);
983         adev->gfx.rlc.reg_list_format_start =
984                         le32_to_cpu(rlc_hdr->reg_list_format_start);
985         adev->gfx.rlc.reg_list_format_separate_start =
986                         le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
987         adev->gfx.rlc.starting_offsets_start =
988                         le32_to_cpu(rlc_hdr->starting_offsets_start);
989         adev->gfx.rlc.reg_list_format_size_bytes =
990                         le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
991         adev->gfx.rlc.reg_list_size_bytes =
992                         le32_to_cpu(rlc_hdr->reg_list_size_bytes);
993
994         adev->gfx.rlc.register_list_format =
995                         kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
996                                         adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
997
998         if (!adev->gfx.rlc.register_list_format) {
999                 err = -ENOMEM;
1000                 goto out;
1001         }
1002
1003         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1004                         le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1005         for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++)
1006                 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
1007
1008         adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1009
1010         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1011                         le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1012         for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++)
1013                 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
1014
1015         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1016         err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1017         if (err)
1018                 goto out;
1019         err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1020         if (err)
1021                 goto out;
1022         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1023         adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1024         adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1025
1026         if ((adev->asic_type != CHIP_STONEY) &&
1027             (adev->asic_type != CHIP_TOPAZ)) {
1028                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1029                 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1030                 if (!err) {
1031                         err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1032                         if (err)
1033                                 goto out;
1034                         cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1035                                 adev->gfx.mec2_fw->data;
1036                         adev->gfx.mec2_fw_version =
1037                                 le32_to_cpu(cp_hdr->header.ucode_version);
1038                         adev->gfx.mec2_feature_version =
1039                                 le32_to_cpu(cp_hdr->ucode_feature_version);
1040                 } else {
1041                         err = 0;
1042                         adev->gfx.mec2_fw = NULL;
1043                 }
1044         }
1045
1046         if (adev->firmware.load_type == AMDGPU_FW_LOAD_SMU) {
1047                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1048                 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1049                 info->fw = adev->gfx.pfp_fw;
1050                 header = (const struct common_firmware_header *)info->fw->data;
1051                 adev->firmware.fw_size +=
1052                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1053
1054                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1055                 info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1056                 info->fw = adev->gfx.me_fw;
1057                 header = (const struct common_firmware_header *)info->fw->data;
1058                 adev->firmware.fw_size +=
1059                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1060
1061                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1062                 info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1063                 info->fw = adev->gfx.ce_fw;
1064                 header = (const struct common_firmware_header *)info->fw->data;
1065                 adev->firmware.fw_size +=
1066                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1067
1068                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1069                 info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1070                 info->fw = adev->gfx.rlc_fw;
1071                 header = (const struct common_firmware_header *)info->fw->data;
1072                 adev->firmware.fw_size +=
1073                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1074
1075                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1076                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1077                 info->fw = adev->gfx.mec_fw;
1078                 header = (const struct common_firmware_header *)info->fw->data;
1079                 adev->firmware.fw_size +=
1080                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1081
1082                 /* we need account JT in */
1083                 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1084                 adev->firmware.fw_size +=
1085                         ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE);
1086
1087                 if (amdgpu_sriov_vf(adev)) {
1088                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_STORAGE];
1089                         info->ucode_id = AMDGPU_UCODE_ID_STORAGE;
1090                         info->fw = adev->gfx.mec_fw;
1091                         adev->firmware.fw_size +=
1092                                 ALIGN(le32_to_cpu(64 * PAGE_SIZE), PAGE_SIZE);
1093                 }
1094
1095                 if (adev->gfx.mec2_fw) {
1096                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1097                         info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1098                         info->fw = adev->gfx.mec2_fw;
1099                         header = (const struct common_firmware_header *)info->fw->data;
1100                         adev->firmware.fw_size +=
1101                                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1102                 }
1103
1104         }
1105
1106 out:
1107         if (err) {
1108                 dev_err(adev->dev,
1109                         "gfx8: Failed to load firmware \"%s\"\n",
1110                         fw_name);
1111                 release_firmware(adev->gfx.pfp_fw);
1112                 adev->gfx.pfp_fw = NULL;
1113                 release_firmware(adev->gfx.me_fw);
1114                 adev->gfx.me_fw = NULL;
1115                 release_firmware(adev->gfx.ce_fw);
1116                 adev->gfx.ce_fw = NULL;
1117                 release_firmware(adev->gfx.rlc_fw);
1118                 adev->gfx.rlc_fw = NULL;
1119                 release_firmware(adev->gfx.mec_fw);
1120                 adev->gfx.mec_fw = NULL;
1121                 release_firmware(adev->gfx.mec2_fw);
1122                 adev->gfx.mec2_fw = NULL;
1123         }
1124         return err;
1125 }
1126
1127 static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
1128                                     volatile u32 *buffer)
1129 {
1130         u32 count = 0, i;
1131         const struct cs_section_def *sect = NULL;
1132         const struct cs_extent_def *ext = NULL;
1133
1134         if (adev->gfx.rlc.cs_data == NULL)
1135                 return;
1136         if (buffer == NULL)
1137                 return;
1138
1139         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1140         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1141
1142         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1143         buffer[count++] = cpu_to_le32(0x80000000);
1144         buffer[count++] = cpu_to_le32(0x80000000);
1145
1146         for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1147                 for (ext = sect->section; ext->extent != NULL; ++ext) {
1148                         if (sect->id == SECT_CONTEXT) {
1149                                 buffer[count++] =
1150                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1151                                 buffer[count++] = cpu_to_le32(ext->reg_index -
1152                                                 PACKET3_SET_CONTEXT_REG_START);
1153                                 for (i = 0; i < ext->reg_count; i++)
1154                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
1155                         } else {
1156                                 return;
1157                         }
1158                 }
1159         }
1160
1161         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1162         buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
1163                         PACKET3_SET_CONTEXT_REG_START);
1164         buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config);
1165         buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1);
1166
1167         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1168         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1169
1170         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1171         buffer[count++] = cpu_to_le32(0);
1172 }
1173
1174 static void cz_init_cp_jump_table(struct amdgpu_device *adev)
1175 {
1176         const __le32 *fw_data;
1177         volatile u32 *dst_ptr;
1178         int me, i, max_me = 4;
1179         u32 bo_offset = 0;
1180         u32 table_offset, table_size;
1181
1182         if (adev->asic_type == CHIP_CARRIZO)
1183                 max_me = 5;
1184
1185         /* write the cp table buffer */
1186         dst_ptr = adev->gfx.rlc.cp_table_ptr;
1187         for (me = 0; me < max_me; me++) {
1188                 if (me == 0) {
1189                         const struct gfx_firmware_header_v1_0 *hdr =
1190                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1191                         fw_data = (const __le32 *)
1192                                 (adev->gfx.ce_fw->data +
1193                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1194                         table_offset = le32_to_cpu(hdr->jt_offset);
1195                         table_size = le32_to_cpu(hdr->jt_size);
1196                 } else if (me == 1) {
1197                         const struct gfx_firmware_header_v1_0 *hdr =
1198                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1199                         fw_data = (const __le32 *)
1200                                 (adev->gfx.pfp_fw->data +
1201                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1202                         table_offset = le32_to_cpu(hdr->jt_offset);
1203                         table_size = le32_to_cpu(hdr->jt_size);
1204                 } else if (me == 2) {
1205                         const struct gfx_firmware_header_v1_0 *hdr =
1206                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1207                         fw_data = (const __le32 *)
1208                                 (adev->gfx.me_fw->data +
1209                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1210                         table_offset = le32_to_cpu(hdr->jt_offset);
1211                         table_size = le32_to_cpu(hdr->jt_size);
1212                 } else if (me == 3) {
1213                         const struct gfx_firmware_header_v1_0 *hdr =
1214                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1215                         fw_data = (const __le32 *)
1216                                 (adev->gfx.mec_fw->data +
1217                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1218                         table_offset = le32_to_cpu(hdr->jt_offset);
1219                         table_size = le32_to_cpu(hdr->jt_size);
1220                 } else  if (me == 4) {
1221                         const struct gfx_firmware_header_v1_0 *hdr =
1222                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
1223                         fw_data = (const __le32 *)
1224                                 (adev->gfx.mec2_fw->data +
1225                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1226                         table_offset = le32_to_cpu(hdr->jt_offset);
1227                         table_size = le32_to_cpu(hdr->jt_size);
1228                 }
1229
1230                 for (i = 0; i < table_size; i ++) {
1231                         dst_ptr[bo_offset + i] =
1232                                 cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
1233                 }
1234
1235                 bo_offset += table_size;
1236         }
1237 }
1238
1239 static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev)
1240 {
1241         amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, NULL, NULL);
1242         amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, NULL, NULL);
1243 }
1244
1245 static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
1246 {
1247         volatile u32 *dst_ptr;
1248         u32 dws;
1249         const struct cs_section_def *cs_data;
1250         int r;
1251
1252         adev->gfx.rlc.cs_data = vi_cs_data;
1253
1254         cs_data = adev->gfx.rlc.cs_data;
1255
1256         if (cs_data) {
1257                 /* clear state block */
1258                 adev->gfx.rlc.clear_state_size = dws = gfx_v8_0_get_csb_size(adev);
1259
1260                 r = amdgpu_bo_create_reserved(adev, dws * 4, PAGE_SIZE,
1261                                               AMDGPU_GEM_DOMAIN_VRAM,
1262                                               &adev->gfx.rlc.clear_state_obj,
1263                                               &adev->gfx.rlc.clear_state_gpu_addr,
1264                                               (void **)&adev->gfx.rlc.cs_ptr);
1265                 if (r) {
1266                         dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r);
1267                         gfx_v8_0_rlc_fini(adev);
1268                         return r;
1269                 }
1270
1271                 /* set up the cs buffer */
1272                 dst_ptr = adev->gfx.rlc.cs_ptr;
1273                 gfx_v8_0_get_csb_buffer(adev, dst_ptr);
1274                 amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
1275                 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1276         }
1277
1278         if ((adev->asic_type == CHIP_CARRIZO) ||
1279             (adev->asic_type == CHIP_STONEY)) {
1280                 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1281                 r = amdgpu_bo_create_reserved(adev, adev->gfx.rlc.cp_table_size,
1282                                               PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
1283                                               &adev->gfx.rlc.cp_table_obj,
1284                                               &adev->gfx.rlc.cp_table_gpu_addr,
1285                                               (void **)&adev->gfx.rlc.cp_table_ptr);
1286                 if (r) {
1287                         dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r);
1288                         return r;
1289                 }
1290
1291                 cz_init_cp_jump_table(adev);
1292
1293                 amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj);
1294                 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1295         }
1296
1297         return 0;
1298 }
1299
1300 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
1301 {
1302         amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1303 }
1304
1305 static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
1306 {
1307         int r;
1308         u32 *hpd;
1309         size_t mec_hpd_size;
1310
1311         bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1312
1313         /* take ownership of the relevant compute queues */
1314         amdgpu_gfx_compute_queue_acquire(adev);
1315
1316         mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE;
1317
1318         r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1319                                       AMDGPU_GEM_DOMAIN_GTT,
1320                                       &adev->gfx.mec.hpd_eop_obj,
1321                                       &adev->gfx.mec.hpd_eop_gpu_addr,
1322                                       (void **)&hpd);
1323         if (r) {
1324                 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1325                 return r;
1326         }
1327
1328         memset(hpd, 0, mec_hpd_size);
1329
1330         amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1331         amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1332
1333         return 0;
1334 }
1335
1336 static const u32 vgpr_init_compute_shader[] =
1337 {
1338         0x7e000209, 0x7e020208,
1339         0x7e040207, 0x7e060206,
1340         0x7e080205, 0x7e0a0204,
1341         0x7e0c0203, 0x7e0e0202,
1342         0x7e100201, 0x7e120200,
1343         0x7e140209, 0x7e160208,
1344         0x7e180207, 0x7e1a0206,
1345         0x7e1c0205, 0x7e1e0204,
1346         0x7e200203, 0x7e220202,
1347         0x7e240201, 0x7e260200,
1348         0x7e280209, 0x7e2a0208,
1349         0x7e2c0207, 0x7e2e0206,
1350         0x7e300205, 0x7e320204,
1351         0x7e340203, 0x7e360202,
1352         0x7e380201, 0x7e3a0200,
1353         0x7e3c0209, 0x7e3e0208,
1354         0x7e400207, 0x7e420206,
1355         0x7e440205, 0x7e460204,
1356         0x7e480203, 0x7e4a0202,
1357         0x7e4c0201, 0x7e4e0200,
1358         0x7e500209, 0x7e520208,
1359         0x7e540207, 0x7e560206,
1360         0x7e580205, 0x7e5a0204,
1361         0x7e5c0203, 0x7e5e0202,
1362         0x7e600201, 0x7e620200,
1363         0x7e640209, 0x7e660208,
1364         0x7e680207, 0x7e6a0206,
1365         0x7e6c0205, 0x7e6e0204,
1366         0x7e700203, 0x7e720202,
1367         0x7e740201, 0x7e760200,
1368         0x7e780209, 0x7e7a0208,
1369         0x7e7c0207, 0x7e7e0206,
1370         0xbf8a0000, 0xbf810000,
1371 };
1372
1373 static const u32 sgpr_init_compute_shader[] =
1374 {
1375         0xbe8a0100, 0xbe8c0102,
1376         0xbe8e0104, 0xbe900106,
1377         0xbe920108, 0xbe940100,
1378         0xbe960102, 0xbe980104,
1379         0xbe9a0106, 0xbe9c0108,
1380         0xbe9e0100, 0xbea00102,
1381         0xbea20104, 0xbea40106,
1382         0xbea60108, 0xbea80100,
1383         0xbeaa0102, 0xbeac0104,
1384         0xbeae0106, 0xbeb00108,
1385         0xbeb20100, 0xbeb40102,
1386         0xbeb60104, 0xbeb80106,
1387         0xbeba0108, 0xbebc0100,
1388         0xbebe0102, 0xbec00104,
1389         0xbec20106, 0xbec40108,
1390         0xbec60100, 0xbec80102,
1391         0xbee60004, 0xbee70005,
1392         0xbeea0006, 0xbeeb0007,
1393         0xbee80008, 0xbee90009,
1394         0xbefc0000, 0xbf8a0000,
1395         0xbf810000, 0x00000000,
1396 };
1397
1398 static const u32 vgpr_init_regs[] =
1399 {
1400         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1401         mmCOMPUTE_RESOURCE_LIMITS, 0,
1402         mmCOMPUTE_NUM_THREAD_X, 256*4,
1403         mmCOMPUTE_NUM_THREAD_Y, 1,
1404         mmCOMPUTE_NUM_THREAD_Z, 1,
1405         mmCOMPUTE_PGM_RSRC2, 20,
1406         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1407         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1408         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1409         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1410         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1411         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1412         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1413         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1414         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1415         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1416 };
1417
1418 static const u32 sgpr1_init_regs[] =
1419 {
1420         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1421         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1422         mmCOMPUTE_NUM_THREAD_X, 256*5,
1423         mmCOMPUTE_NUM_THREAD_Y, 1,
1424         mmCOMPUTE_NUM_THREAD_Z, 1,
1425         mmCOMPUTE_PGM_RSRC2, 20,
1426         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1427         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1428         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1429         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1430         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1431         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1432         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1433         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1434         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1435         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1436 };
1437
1438 static const u32 sgpr2_init_regs[] =
1439 {
1440         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1441         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1442         mmCOMPUTE_NUM_THREAD_X, 256*5,
1443         mmCOMPUTE_NUM_THREAD_Y, 1,
1444         mmCOMPUTE_NUM_THREAD_Z, 1,
1445         mmCOMPUTE_PGM_RSRC2, 20,
1446         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1447         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1448         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1449         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1450         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1451         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1452         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1453         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1454         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1455         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1456 };
1457
1458 static const u32 sec_ded_counter_registers[] =
1459 {
1460         mmCPC_EDC_ATC_CNT,
1461         mmCPC_EDC_SCRATCH_CNT,
1462         mmCPC_EDC_UCODE_CNT,
1463         mmCPF_EDC_ATC_CNT,
1464         mmCPF_EDC_ROQ_CNT,
1465         mmCPF_EDC_TAG_CNT,
1466         mmCPG_EDC_ATC_CNT,
1467         mmCPG_EDC_DMA_CNT,
1468         mmCPG_EDC_TAG_CNT,
1469         mmDC_EDC_CSINVOC_CNT,
1470         mmDC_EDC_RESTORE_CNT,
1471         mmDC_EDC_STATE_CNT,
1472         mmGDS_EDC_CNT,
1473         mmGDS_EDC_GRBM_CNT,
1474         mmGDS_EDC_OA_DED,
1475         mmSPI_EDC_CNT,
1476         mmSQC_ATC_EDC_GATCL1_CNT,
1477         mmSQC_EDC_CNT,
1478         mmSQ_EDC_DED_CNT,
1479         mmSQ_EDC_INFO,
1480         mmSQ_EDC_SEC_CNT,
1481         mmTCC_EDC_CNT,
1482         mmTCP_ATC_EDC_GATCL1_CNT,
1483         mmTCP_EDC_CNT,
1484         mmTD_EDC_CNT
1485 };
1486
1487 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1488 {
1489         struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1490         struct amdgpu_ib ib;
1491         struct dma_fence *f = NULL;
1492         int r, i;
1493         u32 tmp;
1494         unsigned total_size, vgpr_offset, sgpr_offset;
1495         u64 gpu_addr;
1496
1497         /* only supported on CZ */
1498         if (adev->asic_type != CHIP_CARRIZO)
1499                 return 0;
1500
1501         /* bail if the compute ring is not ready */
1502         if (!ring->ready)
1503                 return 0;
1504
1505         tmp = RREG32(mmGB_EDC_MODE);
1506         WREG32(mmGB_EDC_MODE, 0);
1507
1508         total_size =
1509                 (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1510         total_size +=
1511                 (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1512         total_size +=
1513                 (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1514         total_size = ALIGN(total_size, 256);
1515         vgpr_offset = total_size;
1516         total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1517         sgpr_offset = total_size;
1518         total_size += sizeof(sgpr_init_compute_shader);
1519
1520         /* allocate an indirect buffer to put the commands in */
1521         memset(&ib, 0, sizeof(ib));
1522         r = amdgpu_ib_get(adev, NULL, total_size, &ib);
1523         if (r) {
1524                 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1525                 return r;
1526         }
1527
1528         /* load the compute shaders */
1529         for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1530                 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1531
1532         for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1533                 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1534
1535         /* init the ib length to 0 */
1536         ib.length_dw = 0;
1537
1538         /* VGPR */
1539         /* write the register state for the compute dispatch */
1540         for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1541                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1542                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1543                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1544         }
1545         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1546         gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1547         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1548         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1549         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1550         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1551
1552         /* write dispatch packet */
1553         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1554         ib.ptr[ib.length_dw++] = 8; /* x */
1555         ib.ptr[ib.length_dw++] = 1; /* y */
1556         ib.ptr[ib.length_dw++] = 1; /* z */
1557         ib.ptr[ib.length_dw++] =
1558                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1559
1560         /* write CS partial flush packet */
1561         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1562         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1563
1564         /* SGPR1 */
1565         /* write the register state for the compute dispatch */
1566         for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1567                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1568                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1569                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1570         }
1571         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1572         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1573         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1574         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1575         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1576         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1577
1578         /* write dispatch packet */
1579         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1580         ib.ptr[ib.length_dw++] = 8; /* x */
1581         ib.ptr[ib.length_dw++] = 1; /* y */
1582         ib.ptr[ib.length_dw++] = 1; /* z */
1583         ib.ptr[ib.length_dw++] =
1584                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1585
1586         /* write CS partial flush packet */
1587         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1588         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1589
1590         /* SGPR2 */
1591         /* write the register state for the compute dispatch */
1592         for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1593                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1594                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1595                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1596         }
1597         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1598         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1599         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1600         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1601         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1602         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1603
1604         /* write dispatch packet */
1605         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1606         ib.ptr[ib.length_dw++] = 8; /* x */
1607         ib.ptr[ib.length_dw++] = 1; /* y */
1608         ib.ptr[ib.length_dw++] = 1; /* z */
1609         ib.ptr[ib.length_dw++] =
1610                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1611
1612         /* write CS partial flush packet */
1613         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1614         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1615
1616         /* shedule the ib on the ring */
1617         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1618         if (r) {
1619                 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1620                 goto fail;
1621         }
1622
1623         /* wait for the GPU to finish processing the IB */
1624         r = dma_fence_wait(f, false);
1625         if (r) {
1626                 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1627                 goto fail;
1628         }
1629
1630         tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1631         tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1632         WREG32(mmGB_EDC_MODE, tmp);
1633
1634         tmp = RREG32(mmCC_GC_EDC_CONFIG);
1635         tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1636         WREG32(mmCC_GC_EDC_CONFIG, tmp);
1637
1638
1639         /* read back registers to clear the counters */
1640         for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1641                 RREG32(sec_ded_counter_registers[i]);
1642
1643 fail:
1644         amdgpu_ib_free(adev, &ib, NULL);
1645         dma_fence_put(f);
1646
1647         return r;
1648 }
1649
1650 static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1651 {
1652         u32 gb_addr_config;
1653         u32 mc_shared_chmap, mc_arb_ramcfg;
1654         u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1655         u32 tmp;
1656         int ret;
1657
1658         switch (adev->asic_type) {
1659         case CHIP_TOPAZ:
1660                 adev->gfx.config.max_shader_engines = 1;
1661                 adev->gfx.config.max_tile_pipes = 2;
1662                 adev->gfx.config.max_cu_per_sh = 6;
1663                 adev->gfx.config.max_sh_per_se = 1;
1664                 adev->gfx.config.max_backends_per_se = 2;
1665                 adev->gfx.config.max_texture_channel_caches = 2;
1666                 adev->gfx.config.max_gprs = 256;
1667                 adev->gfx.config.max_gs_threads = 32;
1668                 adev->gfx.config.max_hw_contexts = 8;
1669
1670                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1671                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1672                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1673                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1674                 gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1675                 break;
1676         case CHIP_FIJI:
1677                 adev->gfx.config.max_shader_engines = 4;
1678                 adev->gfx.config.max_tile_pipes = 16;
1679                 adev->gfx.config.max_cu_per_sh = 16;
1680                 adev->gfx.config.max_sh_per_se = 1;
1681                 adev->gfx.config.max_backends_per_se = 4;
1682                 adev->gfx.config.max_texture_channel_caches = 16;
1683                 adev->gfx.config.max_gprs = 256;
1684                 adev->gfx.config.max_gs_threads = 32;
1685                 adev->gfx.config.max_hw_contexts = 8;
1686
1687                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1688                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1689                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1690                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1691                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1692                 break;
1693         case CHIP_POLARIS11:
1694         case CHIP_POLARIS12:
1695                 ret = amdgpu_atombios_get_gfx_info(adev);
1696                 if (ret)
1697                         return ret;
1698                 adev->gfx.config.max_gprs = 256;
1699                 adev->gfx.config.max_gs_threads = 32;
1700                 adev->gfx.config.max_hw_contexts = 8;
1701
1702                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1703                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1704                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1705                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1706                 gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
1707                 break;
1708         case CHIP_POLARIS10:
1709                 ret = amdgpu_atombios_get_gfx_info(adev);
1710                 if (ret)
1711                         return ret;
1712                 adev->gfx.config.max_gprs = 256;
1713                 adev->gfx.config.max_gs_threads = 32;
1714                 adev->gfx.config.max_hw_contexts = 8;
1715
1716                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1717                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1718                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1719                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1720                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1721                 break;
1722         case CHIP_TONGA:
1723                 adev->gfx.config.max_shader_engines = 4;
1724                 adev->gfx.config.max_tile_pipes = 8;
1725                 adev->gfx.config.max_cu_per_sh = 8;
1726                 adev->gfx.config.max_sh_per_se = 1;
1727                 adev->gfx.config.max_backends_per_se = 2;
1728                 adev->gfx.config.max_texture_channel_caches = 8;
1729                 adev->gfx.config.max_gprs = 256;
1730                 adev->gfx.config.max_gs_threads = 32;
1731                 adev->gfx.config.max_hw_contexts = 8;
1732
1733                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1734                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1735                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1736                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1737                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1738                 break;
1739         case CHIP_CARRIZO:
1740                 adev->gfx.config.max_shader_engines = 1;
1741                 adev->gfx.config.max_tile_pipes = 2;
1742                 adev->gfx.config.max_sh_per_se = 1;
1743                 adev->gfx.config.max_backends_per_se = 2;
1744                 adev->gfx.config.max_cu_per_sh = 8;
1745                 adev->gfx.config.max_texture_channel_caches = 2;
1746                 adev->gfx.config.max_gprs = 256;
1747                 adev->gfx.config.max_gs_threads = 32;
1748                 adev->gfx.config.max_hw_contexts = 8;
1749
1750                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1751                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1752                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1753                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1754                 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1755                 break;
1756         case CHIP_STONEY:
1757                 adev->gfx.config.max_shader_engines = 1;
1758                 adev->gfx.config.max_tile_pipes = 2;
1759                 adev->gfx.config.max_sh_per_se = 1;
1760                 adev->gfx.config.max_backends_per_se = 1;
1761                 adev->gfx.config.max_cu_per_sh = 3;
1762                 adev->gfx.config.max_texture_channel_caches = 2;
1763                 adev->gfx.config.max_gprs = 256;
1764                 adev->gfx.config.max_gs_threads = 16;
1765                 adev->gfx.config.max_hw_contexts = 8;
1766
1767                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1768                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1769                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1770                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1771                 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1772                 break;
1773         default:
1774                 adev->gfx.config.max_shader_engines = 2;
1775                 adev->gfx.config.max_tile_pipes = 4;
1776                 adev->gfx.config.max_cu_per_sh = 2;
1777                 adev->gfx.config.max_sh_per_se = 1;
1778                 adev->gfx.config.max_backends_per_se = 2;
1779                 adev->gfx.config.max_texture_channel_caches = 4;
1780                 adev->gfx.config.max_gprs = 256;
1781                 adev->gfx.config.max_gs_threads = 32;
1782                 adev->gfx.config.max_hw_contexts = 8;
1783
1784                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1785                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1786                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1787                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1788                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1789                 break;
1790         }
1791
1792         mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
1793         adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1794         mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1795
1796         adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1797         adev->gfx.config.mem_max_burst_length_bytes = 256;
1798         if (adev->flags & AMD_IS_APU) {
1799                 /* Get memory bank mapping mode. */
1800                 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
1801                 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1802                 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1803
1804                 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
1805                 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1806                 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1807
1808                 /* Validate settings in case only one DIMM installed. */
1809                 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
1810                         dimm00_addr_map = 0;
1811                 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
1812                         dimm01_addr_map = 0;
1813                 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
1814                         dimm10_addr_map = 0;
1815                 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
1816                         dimm11_addr_map = 0;
1817
1818                 /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1819                 /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1820                 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
1821                         adev->gfx.config.mem_row_size_in_kb = 2;
1822                 else
1823                         adev->gfx.config.mem_row_size_in_kb = 1;
1824         } else {
1825                 tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
1826                 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1827                 if (adev->gfx.config.mem_row_size_in_kb > 4)
1828                         adev->gfx.config.mem_row_size_in_kb = 4;
1829         }
1830
1831         adev->gfx.config.shader_engine_tile_size = 32;
1832         adev->gfx.config.num_gpus = 1;
1833         adev->gfx.config.multi_gpu_tile_size = 64;
1834
1835         /* fix up row size */
1836         switch (adev->gfx.config.mem_row_size_in_kb) {
1837         case 1:
1838         default:
1839                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
1840                 break;
1841         case 2:
1842                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
1843                 break;
1844         case 4:
1845                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
1846                 break;
1847         }
1848         adev->gfx.config.gb_addr_config = gb_addr_config;
1849
1850         return 0;
1851 }
1852
1853 static int gfx_v8_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
1854                                         int mec, int pipe, int queue)
1855 {
1856         int r;
1857         unsigned irq_type;
1858         struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
1859
1860         ring = &adev->gfx.compute_ring[ring_id];
1861
1862         /* mec0 is me1 */
1863         ring->me = mec + 1;
1864         ring->pipe = pipe;
1865         ring->queue = queue;
1866
1867         ring->ring_obj = NULL;
1868         ring->use_doorbell = true;
1869         ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + ring_id;
1870         ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
1871                                 + (ring_id * GFX8_MEC_HPD_SIZE);
1872         sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
1873
1874         irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
1875                 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
1876                 + ring->pipe;
1877
1878         /* type-2 packets are deprecated on MEC, use type-3 instead */
1879         r = amdgpu_ring_init(adev, ring, 1024,
1880                         &adev->gfx.eop_irq, irq_type);
1881         if (r)
1882                 return r;
1883
1884
1885         return 0;
1886 }
1887
1888 static int gfx_v8_0_sw_init(void *handle)
1889 {
1890         int i, j, k, r, ring_id;
1891         struct amdgpu_ring *ring;
1892         struct amdgpu_kiq *kiq;
1893         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1894
1895         switch (adev->asic_type) {
1896         case CHIP_FIJI:
1897         case CHIP_TONGA:
1898         case CHIP_POLARIS11:
1899         case CHIP_POLARIS12:
1900         case CHIP_POLARIS10:
1901         case CHIP_CARRIZO:
1902                 adev->gfx.mec.num_mec = 2;
1903                 break;
1904         case CHIP_TOPAZ:
1905         case CHIP_STONEY:
1906         default:
1907                 adev->gfx.mec.num_mec = 1;
1908                 break;
1909         }
1910
1911         adev->gfx.mec.num_pipe_per_mec = 4;
1912         adev->gfx.mec.num_queue_per_pipe = 8;
1913
1914         /* KIQ event */
1915         r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 178, &adev->gfx.kiq.irq);
1916         if (r)
1917                 return r;
1918
1919         /* EOP Event */
1920         r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 181, &adev->gfx.eop_irq);
1921         if (r)
1922                 return r;
1923
1924         /* Privileged reg */
1925         r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 184,
1926                               &adev->gfx.priv_reg_irq);
1927         if (r)
1928                 return r;
1929
1930         /* Privileged inst */
1931         r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 185,
1932                               &adev->gfx.priv_inst_irq);
1933         if (r)
1934                 return r;
1935
1936         adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
1937
1938         gfx_v8_0_scratch_init(adev);
1939
1940         r = gfx_v8_0_init_microcode(adev);
1941         if (r) {
1942                 DRM_ERROR("Failed to load gfx firmware!\n");
1943                 return r;
1944         }
1945
1946         r = gfx_v8_0_rlc_init(adev);
1947         if (r) {
1948                 DRM_ERROR("Failed to init rlc BOs!\n");
1949                 return r;
1950         }
1951
1952         r = gfx_v8_0_mec_init(adev);
1953         if (r) {
1954                 DRM_ERROR("Failed to init MEC BOs!\n");
1955                 return r;
1956         }
1957
1958         /* set up the gfx ring */
1959         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
1960                 ring = &adev->gfx.gfx_ring[i];
1961                 ring->ring_obj = NULL;
1962                 sprintf(ring->name, "gfx");
1963                 /* no gfx doorbells on iceland */
1964                 if (adev->asic_type != CHIP_TOPAZ) {
1965                         ring->use_doorbell = true;
1966                         ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0;
1967                 }
1968
1969                 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
1970                                      AMDGPU_CP_IRQ_GFX_EOP);
1971                 if (r)
1972                         return r;
1973         }
1974
1975
1976         /* set up the compute queues - allocate horizontally across pipes */
1977         ring_id = 0;
1978         for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
1979                 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
1980                         for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
1981                                 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
1982                                         continue;
1983
1984                                 r = gfx_v8_0_compute_ring_init(adev,
1985                                                                 ring_id,
1986                                                                 i, k, j);
1987                                 if (r)
1988                                         return r;
1989
1990                                 ring_id++;
1991                         }
1992                 }
1993         }
1994
1995         r = amdgpu_gfx_kiq_init(adev, GFX8_MEC_HPD_SIZE);
1996         if (r) {
1997                 DRM_ERROR("Failed to init KIQ BOs!\n");
1998                 return r;
1999         }
2000
2001         kiq = &adev->gfx.kiq;
2002         r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
2003         if (r)
2004                 return r;
2005
2006         /* create MQD for all compute queues as well as KIQ for SRIOV case */
2007         r = amdgpu_gfx_compute_mqd_sw_init(adev, sizeof(struct vi_mqd_allocation));
2008         if (r)
2009                 return r;
2010
2011         /* reserve GDS, GWS and OA resource for gfx */
2012         r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size,
2013                                     PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS,
2014                                     &adev->gds.gds_gfx_bo, NULL, NULL);
2015         if (r)
2016                 return r;
2017
2018         r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size,
2019                                     PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS,
2020                                     &adev->gds.gws_gfx_bo, NULL, NULL);
2021         if (r)
2022                 return r;
2023
2024         r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size,
2025                                     PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA,
2026                                     &adev->gds.oa_gfx_bo, NULL, NULL);
2027         if (r)
2028                 return r;
2029
2030         adev->gfx.ce_ram_size = 0x8000;
2031
2032         r = gfx_v8_0_gpu_early_init(adev);
2033         if (r)
2034                 return r;
2035
2036         return 0;
2037 }
2038
2039 static int gfx_v8_0_sw_fini(void *handle)
2040 {
2041         int i;
2042         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2043
2044         amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
2045         amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL);
2046         amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
2047
2048         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2049                 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2050         for (i = 0; i < adev->gfx.num_compute_rings; i++)
2051                 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2052
2053         amdgpu_gfx_compute_mqd_sw_fini(adev);
2054         amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
2055         amdgpu_gfx_kiq_fini(adev);
2056
2057         gfx_v8_0_mec_fini(adev);
2058         gfx_v8_0_rlc_fini(adev);
2059         gfx_v8_0_free_microcode(adev);
2060
2061         return 0;
2062 }
2063
2064 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
2065 {
2066         uint32_t *modearray, *mod2array;
2067         const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
2068         const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
2069         u32 reg_offset;
2070
2071         modearray = adev->gfx.config.tile_mode_array;
2072         mod2array = adev->gfx.config.macrotile_mode_array;
2073
2074         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2075                 modearray[reg_offset] = 0;
2076
2077         for (reg_offset = 0; reg_offset <  num_secondary_tile_mode_states; reg_offset++)
2078                 mod2array[reg_offset] = 0;
2079
2080         switch (adev->asic_type) {
2081         case CHIP_TOPAZ:
2082                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2083                                 PIPE_CONFIG(ADDR_SURF_P2) |
2084                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2085                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2086                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2087                                 PIPE_CONFIG(ADDR_SURF_P2) |
2088                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2089                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2090                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2091                                 PIPE_CONFIG(ADDR_SURF_P2) |
2092                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2093                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2094                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2095                                 PIPE_CONFIG(ADDR_SURF_P2) |
2096                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2097                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2098                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2099                                 PIPE_CONFIG(ADDR_SURF_P2) |
2100                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2101                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2102                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2103                                 PIPE_CONFIG(ADDR_SURF_P2) |
2104                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2105                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2106                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2107                                 PIPE_CONFIG(ADDR_SURF_P2) |
2108                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2109                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2110                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2111                                 PIPE_CONFIG(ADDR_SURF_P2));
2112                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2113                                 PIPE_CONFIG(ADDR_SURF_P2) |
2114                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2115                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2116                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2117                                  PIPE_CONFIG(ADDR_SURF_P2) |
2118                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2119                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2120                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2121                                  PIPE_CONFIG(ADDR_SURF_P2) |
2122                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2123                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2124                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2125                                  PIPE_CONFIG(ADDR_SURF_P2) |
2126                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2127                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2128                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2129                                  PIPE_CONFIG(ADDR_SURF_P2) |
2130                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2131                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2132                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2133                                  PIPE_CONFIG(ADDR_SURF_P2) |
2134                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2135                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2136                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2137                                  PIPE_CONFIG(ADDR_SURF_P2) |
2138                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2139                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2140                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2141                                  PIPE_CONFIG(ADDR_SURF_P2) |
2142                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2143                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2144                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2145                                  PIPE_CONFIG(ADDR_SURF_P2) |
2146                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2147                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2148                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2149                                  PIPE_CONFIG(ADDR_SURF_P2) |
2150                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2151                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2152                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2153                                  PIPE_CONFIG(ADDR_SURF_P2) |
2154                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2155                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2156                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2157                                  PIPE_CONFIG(ADDR_SURF_P2) |
2158                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2159                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2160                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2161                                  PIPE_CONFIG(ADDR_SURF_P2) |
2162                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2163                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2164                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2165                                  PIPE_CONFIG(ADDR_SURF_P2) |
2166                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2167                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2168                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2169                                  PIPE_CONFIG(ADDR_SURF_P2) |
2170                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2171                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2172                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2173                                  PIPE_CONFIG(ADDR_SURF_P2) |
2174                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2175                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2176                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2177                                  PIPE_CONFIG(ADDR_SURF_P2) |
2178                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2179                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2180                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2181                                  PIPE_CONFIG(ADDR_SURF_P2) |
2182                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2183                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2184
2185                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2186                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2187                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2188                                 NUM_BANKS(ADDR_SURF_8_BANK));
2189                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2190                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2191                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2192                                 NUM_BANKS(ADDR_SURF_8_BANK));
2193                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2194                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2195                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2196                                 NUM_BANKS(ADDR_SURF_8_BANK));
2197                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2198                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2199                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2200                                 NUM_BANKS(ADDR_SURF_8_BANK));
2201                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2202                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2203                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2204                                 NUM_BANKS(ADDR_SURF_8_BANK));
2205                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2206                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2207                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2208                                 NUM_BANKS(ADDR_SURF_8_BANK));
2209                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2210                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2211                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2212                                 NUM_BANKS(ADDR_SURF_8_BANK));
2213                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2214                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2215                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2216                                 NUM_BANKS(ADDR_SURF_16_BANK));
2217                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2218                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2219                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2220                                 NUM_BANKS(ADDR_SURF_16_BANK));
2221                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2222                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2223                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2224                                  NUM_BANKS(ADDR_SURF_16_BANK));
2225                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2226                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2227                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2228                                  NUM_BANKS(ADDR_SURF_16_BANK));
2229                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2230                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2231                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2232                                  NUM_BANKS(ADDR_SURF_16_BANK));
2233                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2234                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2235                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2236                                  NUM_BANKS(ADDR_SURF_16_BANK));
2237                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2238                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2239                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2240                                  NUM_BANKS(ADDR_SURF_8_BANK));
2241
2242                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2243                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2244                             reg_offset != 23)
2245                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2246
2247                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2248                         if (reg_offset != 7)
2249                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2250
2251                 break;
2252         case CHIP_FIJI:
2253                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2254                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2255                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2256                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2257                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2258                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2259                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2260                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2261                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2262                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2263                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2264                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2265                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2266                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2267                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2268                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2269                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2270                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2271                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2272                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2273                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2274                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2275                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2276                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2277                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2278                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2279                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2280                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2281                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2282                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2283                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2284                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2285                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2286                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2287                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2288                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2289                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2290                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2291                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2292                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2293                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2294                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2295                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2296                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2297                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2298                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2299                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2300                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2301                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2302                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2303                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2304                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2305                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2306                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2307                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2308                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2309                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2310                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2311                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2312                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2313                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2314                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2315                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2316                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2317                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2318                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2319                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2320                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2321                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2322                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2323                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2324                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2325                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2326                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2327                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2328                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2329                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2330                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2331                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2332                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2333                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2334                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2335                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2336                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2337                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2338                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2339                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2340                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2341                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2342                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2343                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2344                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2345                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2346                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2347                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2348                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2349                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2350                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2351                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2352                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2353                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2354                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2355                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2356                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2357                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2358                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2359                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2360                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2361                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2362                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2363                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2364                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2365                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2366                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2367                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2368                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2369                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2370                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2371                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2372                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2373                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2374                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2375
2376                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2377                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2378                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2379                                 NUM_BANKS(ADDR_SURF_8_BANK));
2380                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2381                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2382                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2383                                 NUM_BANKS(ADDR_SURF_8_BANK));
2384                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2385                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2386                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2387                                 NUM_BANKS(ADDR_SURF_8_BANK));
2388                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2389                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2390                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2391                                 NUM_BANKS(ADDR_SURF_8_BANK));
2392                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2393                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2394                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2395                                 NUM_BANKS(ADDR_SURF_8_BANK));
2396                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2397                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2398                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2399                                 NUM_BANKS(ADDR_SURF_8_BANK));
2400                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2401                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2402                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2403                                 NUM_BANKS(ADDR_SURF_8_BANK));
2404                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2405                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2406                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2407                                 NUM_BANKS(ADDR_SURF_8_BANK));
2408                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2409                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2410                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2411                                 NUM_BANKS(ADDR_SURF_8_BANK));
2412                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2413                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2414                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2415                                  NUM_BANKS(ADDR_SURF_8_BANK));
2416                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2417                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2418                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2419                                  NUM_BANKS(ADDR_SURF_8_BANK));
2420                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2421                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2422                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2423                                  NUM_BANKS(ADDR_SURF_8_BANK));
2424                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2425                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2426                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2427                                  NUM_BANKS(ADDR_SURF_8_BANK));
2428                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2429                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2430                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2431                                  NUM_BANKS(ADDR_SURF_4_BANK));
2432
2433                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2434                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2435
2436                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2437                         if (reg_offset != 7)
2438                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2439
2440                 break;
2441         case CHIP_TONGA:
2442                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2443                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2444                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2445                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2446                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2447                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2448                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2449                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2450                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2451                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2452                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2453                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2454                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2455                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2456                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2457                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2458                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2459                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2460                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2461                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2462                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2463                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2464                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2465                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2466                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2467                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2468                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2469                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2470                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2471                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2472                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2473                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2474                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2475                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2476                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2477                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2478                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2479                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2480                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2481                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2482                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2483                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2484                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2485                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2486                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2487                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2488                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2489                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2490                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2491                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2492                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2493                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2494                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2495                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2496                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2497                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2498                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2499                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2500                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2501                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2502                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2503                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2504                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2505                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2506                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2507                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2508                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2509                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2510                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2511                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2512                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2513                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2514                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2515                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2516                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2517                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2518                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2519                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2520                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2521                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2522                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2523                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2524                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2525                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2526                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2527                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2528                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2529                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2530                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2531                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2532                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2533                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2534                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2535                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2536                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2537                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2538                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2539                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2540                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2541                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2542                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2543                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2544                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2545                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2546                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2547                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2548                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2549                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2550                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2551                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2552                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2553                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2554                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2555                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2556                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2557                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2558                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2559                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2560                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2561                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2562                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2563                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2564
2565                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2566                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2567                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2568                                 NUM_BANKS(ADDR_SURF_16_BANK));
2569                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2570                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2571                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2572                                 NUM_BANKS(ADDR_SURF_16_BANK));
2573                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2574                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2575                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2576                                 NUM_BANKS(ADDR_SURF_16_BANK));
2577                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2578                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2579                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2580                                 NUM_BANKS(ADDR_SURF_16_BANK));
2581                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2582                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2583                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2584                                 NUM_BANKS(ADDR_SURF_16_BANK));
2585                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2586                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2587                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2588                                 NUM_BANKS(ADDR_SURF_16_BANK));
2589                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2590                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2591                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2592                                 NUM_BANKS(ADDR_SURF_16_BANK));
2593                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2594                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2595                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2596                                 NUM_BANKS(ADDR_SURF_16_BANK));
2597                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2598                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2599                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2600                                 NUM_BANKS(ADDR_SURF_16_BANK));
2601                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2602                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2603                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2604                                  NUM_BANKS(ADDR_SURF_16_BANK));
2605                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2606                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2607                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2608                                  NUM_BANKS(ADDR_SURF_16_BANK));
2609                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2610                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2611                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2612                                  NUM_BANKS(ADDR_SURF_8_BANK));
2613                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2614                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2615                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2616                                  NUM_BANKS(ADDR_SURF_4_BANK));
2617                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2618                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2619                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2620                                  NUM_BANKS(ADDR_SURF_4_BANK));
2621
2622                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2623                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2624
2625                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2626                         if (reg_offset != 7)
2627                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2628
2629                 break;
2630         case CHIP_POLARIS11:
2631         case CHIP_POLARIS12:
2632                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2633                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2634                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2635                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2636                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2637                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2638                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2639                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2640                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2641                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2642                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2643                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2644                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2645                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2646                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2647                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2648                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2649                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2650                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2651                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2652                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2653                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2654                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2655                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2656                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2657                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2658                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2659                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2660                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2661                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2662                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2663                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2664                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2665                                 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2666                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2667                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2668                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2669                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2670                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2671                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2672                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2673                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2674                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2675                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2676                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2677                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2678                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2679                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2680                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2681                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2682                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2683                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2684                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2685                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2686                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2687                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2688                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2689                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2690                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2691                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2692                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2693                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2694                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2695                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2696                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2697                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2698                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2699                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2700                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2701                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2702                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2703                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2704                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2705                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2706                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2707                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2708                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2709                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2710                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2711                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2712                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2713                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2714                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2715                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2716                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2717                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2718                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2719                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2720                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2721                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2722                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2723                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2724                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2725                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2726                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2727                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2728                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2729                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2730                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2731                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2732                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2733                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2734                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2735                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2736                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2737                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2738                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2739                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2740                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2741                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2742                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2743                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2744                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2745                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2746                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2747                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2748                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2749                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2750                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2751                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2752                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2753                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2754
2755                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2756                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2757                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2758                                 NUM_BANKS(ADDR_SURF_16_BANK));
2759
2760                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2761                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2762                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2763                                 NUM_BANKS(ADDR_SURF_16_BANK));
2764
2765                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2766                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2767                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2768                                 NUM_BANKS(ADDR_SURF_16_BANK));
2769
2770                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2771                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2772                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2773                                 NUM_BANKS(ADDR_SURF_16_BANK));
2774
2775                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2776                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2777                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2778                                 NUM_BANKS(ADDR_SURF_16_BANK));
2779
2780                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2781                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2782                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2783                                 NUM_BANKS(ADDR_SURF_16_BANK));
2784
2785                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2786                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2787                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2788                                 NUM_BANKS(ADDR_SURF_16_BANK));
2789
2790                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2791                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2792                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2793                                 NUM_BANKS(ADDR_SURF_16_BANK));
2794
2795                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2796                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2797                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2798                                 NUM_BANKS(ADDR_SURF_16_BANK));
2799
2800                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2801                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2802                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2803                                 NUM_BANKS(ADDR_SURF_16_BANK));
2804
2805                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2806                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2807                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2808                                 NUM_BANKS(ADDR_SURF_16_BANK));
2809
2810                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2811                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2812                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2813                                 NUM_BANKS(ADDR_SURF_16_BANK));
2814
2815                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2816                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2817                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2818                                 NUM_BANKS(ADDR_SURF_8_BANK));
2819
2820                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2821                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2822                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2823                                 NUM_BANKS(ADDR_SURF_4_BANK));
2824
2825                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2826                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2827
2828                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2829                         if (reg_offset != 7)
2830                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2831
2832                 break;
2833         case CHIP_POLARIS10:
2834                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2835                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2836                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2837                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2838                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2839                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2840                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2841                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2842                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2843                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2844                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2845                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2846                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2847                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2848                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2849                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2850                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2851                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2852                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2853                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2854                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2855                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2856                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2857                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2858                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2859                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2860                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2861                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2862                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2863                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2864                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2865                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2866                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2867                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2868                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2869                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2870                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2871                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2872                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2873                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2874                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2875                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2876                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2877                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2878                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2879                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2880                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2881                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2882                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2883                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2884                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2885                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2886                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2887                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2888                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2889                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2890                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2891                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2892                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2893                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2894                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2895                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2896                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2897                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2898                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2899                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2900                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2901                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2902                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2903                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2904                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2905                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2906                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2907                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2908                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2909                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2910                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2911                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2912                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2913                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2914                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2915                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2916                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2917                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2918                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2919                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2920                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2921                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2922                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2923                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2924                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2925                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2926                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2927                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2928                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2929                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2930                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2931                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2932                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2933                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2934                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2935                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2936                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2937                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2938                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2939                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2940                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2941                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2942                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2943                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2944                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2945                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2946                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2947                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2948                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2949                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2950                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2951                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2952                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2953                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2954                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2955                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2956
2957                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2958                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2959                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2960                                 NUM_BANKS(ADDR_SURF_16_BANK));
2961
2962                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2963                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2964                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2965                                 NUM_BANKS(ADDR_SURF_16_BANK));
2966
2967                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2968                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2969                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2970                                 NUM_BANKS(ADDR_SURF_16_BANK));
2971
2972                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2973                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2974                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2975                                 NUM_BANKS(ADDR_SURF_16_BANK));
2976
2977                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2978                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2979                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2980                                 NUM_BANKS(ADDR_SURF_16_BANK));
2981
2982                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2983                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2984                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2985                                 NUM_BANKS(ADDR_SURF_16_BANK));
2986
2987                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2988                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2989                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2990                                 NUM_BANKS(ADDR_SURF_16_BANK));
2991
2992                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2993                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2994                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2995                                 NUM_BANKS(ADDR_SURF_16_BANK));
2996
2997                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2998                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2999                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3000                                 NUM_BANKS(ADDR_SURF_16_BANK));
3001
3002                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3003                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3004                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3005                                 NUM_BANKS(ADDR_SURF_16_BANK));
3006
3007                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3008                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3009                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3010                                 NUM_BANKS(ADDR_SURF_16_BANK));
3011
3012                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3013                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3014                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3015                                 NUM_BANKS(ADDR_SURF_8_BANK));
3016
3017                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3018                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3019                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3020                                 NUM_BANKS(ADDR_SURF_4_BANK));
3021
3022                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3023                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3024                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3025                                 NUM_BANKS(ADDR_SURF_4_BANK));
3026
3027                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3028                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3029
3030                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3031                         if (reg_offset != 7)
3032                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3033
3034                 break;
3035         case CHIP_STONEY:
3036                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3037                                 PIPE_CONFIG(ADDR_SURF_P2) |
3038                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3039                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3040                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3041                                 PIPE_CONFIG(ADDR_SURF_P2) |
3042                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3043                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3044                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3045                                 PIPE_CONFIG(ADDR_SURF_P2) |
3046                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3047                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3048                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3049                                 PIPE_CONFIG(ADDR_SURF_P2) |
3050                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3051                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3052                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3053                                 PIPE_CONFIG(ADDR_SURF_P2) |
3054                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3055                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3056                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3057                                 PIPE_CONFIG(ADDR_SURF_P2) |
3058                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3059                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3060                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3061                                 PIPE_CONFIG(ADDR_SURF_P2) |
3062                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3063                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3064                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3065                                 PIPE_CONFIG(ADDR_SURF_P2));
3066                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3067                                 PIPE_CONFIG(ADDR_SURF_P2) |
3068                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3069                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3070                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3071                                  PIPE_CONFIG(ADDR_SURF_P2) |
3072                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3073                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3074                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3075                                  PIPE_CONFIG(ADDR_SURF_P2) |
3076                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3077                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3078                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3079                                  PIPE_CONFIG(ADDR_SURF_P2) |
3080                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3081                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3082                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3083                                  PIPE_CONFIG(ADDR_SURF_P2) |
3084                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3085                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3086                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3087                                  PIPE_CONFIG(ADDR_SURF_P2) |
3088                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3089                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3090                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3091                                  PIPE_CONFIG(ADDR_SURF_P2) |
3092                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3093                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3094                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3095                                  PIPE_CONFIG(ADDR_SURF_P2) |
3096                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3097                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3098                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3099                                  PIPE_CONFIG(ADDR_SURF_P2) |
3100                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3101                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3102                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3103                                  PIPE_CONFIG(ADDR_SURF_P2) |
3104                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3105                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3106                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3107                                  PIPE_CONFIG(ADDR_SURF_P2) |
3108                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3109                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3110                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3111                                  PIPE_CONFIG(ADDR_SURF_P2) |
3112                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3113                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3114                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3115                                  PIPE_CONFIG(ADDR_SURF_P2) |
3116                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3117                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3118                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3119                                  PIPE_CONFIG(ADDR_SURF_P2) |
3120                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3121                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3122                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3123                                  PIPE_CONFIG(ADDR_SURF_P2) |
3124                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3125                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3126                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3127                                  PIPE_CONFIG(ADDR_SURF_P2) |
3128                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3129                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3130                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3131                                  PIPE_CONFIG(ADDR_SURF_P2) |
3132                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3133                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3134                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3135                                  PIPE_CONFIG(ADDR_SURF_P2) |
3136                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3137                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3138
3139                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3140                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3141                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3142                                 NUM_BANKS(ADDR_SURF_8_BANK));
3143                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3144                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3145                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3146                                 NUM_BANKS(ADDR_SURF_8_BANK));
3147                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3148                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3149                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3150                                 NUM_BANKS(ADDR_SURF_8_BANK));
3151                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3152                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3153                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3154                                 NUM_BANKS(ADDR_SURF_8_BANK));
3155                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3156                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3157                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3158                                 NUM_BANKS(ADDR_SURF_8_BANK));
3159                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3160                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3161                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3162                                 NUM_BANKS(ADDR_SURF_8_BANK));
3163                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3164                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3165                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3166                                 NUM_BANKS(ADDR_SURF_8_BANK));
3167                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3168                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3169                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3170                                 NUM_BANKS(ADDR_SURF_16_BANK));
3171                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3172                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3173                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3174                                 NUM_BANKS(ADDR_SURF_16_BANK));
3175                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3176                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3177                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3178                                  NUM_BANKS(ADDR_SURF_16_BANK));
3179                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3180                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3181                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3182                                  NUM_BANKS(ADDR_SURF_16_BANK));
3183                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3184                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3185                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3186                                  NUM_BANKS(ADDR_SURF_16_BANK));
3187                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3188                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3189                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3190                                  NUM_BANKS(ADDR_SURF_16_BANK));
3191                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3192                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3193                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3194                                  NUM_BANKS(ADDR_SURF_8_BANK));
3195
3196                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3197                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3198                             reg_offset != 23)
3199                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3200
3201                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3202                         if (reg_offset != 7)
3203                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3204
3205                 break;
3206         default:
3207                 dev_warn(adev->dev,
3208                          "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
3209                          adev->asic_type);
3210
3211         case CHIP_CARRIZO:
3212                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3213                                 PIPE_CONFIG(ADDR_SURF_P2) |
3214                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3215                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3216                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3217                                 PIPE_CONFIG(ADDR_SURF_P2) |
3218                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3219                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3220                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3221                                 PIPE_CONFIG(ADDR_SURF_P2) |
3222                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3223                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3224                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3225                                 PIPE_CONFIG(ADDR_SURF_P2) |
3226                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3227                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3228                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3229                                 PIPE_CONFIG(ADDR_SURF_P2) |
3230                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3231                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3232                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3233                                 PIPE_CONFIG(ADDR_SURF_P2) |
3234                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3235                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3236                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3237                                 PIPE_CONFIG(ADDR_SURF_P2) |
3238                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3239                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3240                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3241                                 PIPE_CONFIG(ADDR_SURF_P2));
3242                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3243                                 PIPE_CONFIG(ADDR_SURF_P2) |
3244                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3245                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3246                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3247                                  PIPE_CONFIG(ADDR_SURF_P2) |
3248                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3249                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3250                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3251                                  PIPE_CONFIG(ADDR_SURF_P2) |
3252                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3253                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3254                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3255                                  PIPE_CONFIG(ADDR_SURF_P2) |
3256                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3257                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3258                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3259                                  PIPE_CONFIG(ADDR_SURF_P2) |
3260                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3261                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3262                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3263                                  PIPE_CONFIG(ADDR_SURF_P2) |
3264                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3265                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3266                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3267                                  PIPE_CONFIG(ADDR_SURF_P2) |
3268                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3269                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3270                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3271                                  PIPE_CONFIG(ADDR_SURF_P2) |
3272                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3273                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3274                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3275                                  PIPE_CONFIG(ADDR_SURF_P2) |
3276                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3277                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3278                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3279                                  PIPE_CONFIG(ADDR_SURF_P2) |
3280                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3281                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3282                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3283                                  PIPE_CONFIG(ADDR_SURF_P2) |
3284                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3285                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3286                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3287                                  PIPE_CONFIG(ADDR_SURF_P2) |
3288                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3289                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3290                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3291                                  PIPE_CONFIG(ADDR_SURF_P2) |
3292                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3293                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3294                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3295                                  PIPE_CONFIG(ADDR_SURF_P2) |
3296                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3297                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3298                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3299                                  PIPE_CONFIG(ADDR_SURF_P2) |
3300                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3301                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3302                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3303                                  PIPE_CONFIG(ADDR_SURF_P2) |
3304                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3305                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3306                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3307                                  PIPE_CONFIG(ADDR_SURF_P2) |
3308                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3309                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3310                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3311                                  PIPE_CONFIG(ADDR_SURF_P2) |
3312                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3313                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3314
3315                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3316                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3317                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3318                                 NUM_BANKS(ADDR_SURF_8_BANK));
3319                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3320                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3321                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3322                                 NUM_BANKS(ADDR_SURF_8_BANK));
3323                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3324                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3325                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3326                                 NUM_BANKS(ADDR_SURF_8_BANK));
3327                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3328                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3329                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3330                                 NUM_BANKS(ADDR_SURF_8_BANK));
3331                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3332                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3333                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3334                                 NUM_BANKS(ADDR_SURF_8_BANK));
3335                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3336                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3337                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3338                                 NUM_BANKS(ADDR_SURF_8_BANK));
3339                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3340                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3341                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3342                                 NUM_BANKS(ADDR_SURF_8_BANK));
3343                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3344                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3345                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3346                                 NUM_BANKS(ADDR_SURF_16_BANK));
3347                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3348                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3349                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3350                                 NUM_BANKS(ADDR_SURF_16_BANK));
3351                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3352                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3353                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3354                                  NUM_BANKS(ADDR_SURF_16_BANK));
3355                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3356                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3357                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3358                                  NUM_BANKS(ADDR_SURF_16_BANK));
3359                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3360                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3361                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3362                                  NUM_BANKS(ADDR_SURF_16_BANK));
3363                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3364                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3365                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3366                                  NUM_BANKS(ADDR_SURF_16_BANK));
3367                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3368                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3369                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3370                                  NUM_BANKS(ADDR_SURF_8_BANK));
3371
3372                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3373                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3374                             reg_offset != 23)
3375                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3376
3377                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3378                         if (reg_offset != 7)
3379                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3380
3381                 break;
3382         }
3383 }
3384
3385 static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
3386                                   u32 se_num, u32 sh_num, u32 instance)
3387 {
3388         u32 data;
3389
3390         if (instance == 0xffffffff)
3391                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
3392         else
3393                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
3394
3395         if (se_num == 0xffffffff)
3396                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3397         else
3398                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3399
3400         if (sh_num == 0xffffffff)
3401                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3402         else
3403                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3404
3405         WREG32(mmGRBM_GFX_INDEX, data);
3406 }
3407
3408 static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
3409 {
3410         u32 data, mask;
3411
3412         data =  RREG32(mmCC_RB_BACKEND_DISABLE) |
3413                 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3414
3415         data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE);
3416
3417         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
3418                                          adev->gfx.config.max_sh_per_se);
3419
3420         return (~data) & mask;
3421 }
3422
3423 static void
3424 gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1)
3425 {
3426         switch (adev->asic_type) {
3427         case CHIP_FIJI:
3428                 *rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) |
3429                           RB_XSEL2(1) | PKR_MAP(2) |
3430                           PKR_XSEL(1) | PKR_YSEL(1) |
3431                           SE_MAP(2) | SE_XSEL(2) | SE_YSEL(3);
3432                 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) |
3433                            SE_PAIR_YSEL(2);
3434                 break;
3435         case CHIP_TONGA:
3436         case CHIP_POLARIS10:
3437                 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3438                           SE_XSEL(1) | SE_YSEL(1);
3439                 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(2) |
3440                            SE_PAIR_YSEL(2);
3441                 break;
3442         case CHIP_TOPAZ:
3443         case CHIP_CARRIZO:
3444                 *rconf |= RB_MAP_PKR0(2);
3445                 *rconf1 |= 0x0;
3446                 break;
3447         case CHIP_POLARIS11:
3448         case CHIP_POLARIS12:
3449                 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3450                           SE_XSEL(1) | SE_YSEL(1);
3451                 *rconf1 |= 0x0;
3452                 break;
3453         case CHIP_STONEY:
3454                 *rconf |= 0x0;
3455                 *rconf1 |= 0x0;
3456                 break;
3457         default:
3458                 DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type);
3459                 break;
3460         }
3461 }
3462
3463 static void
3464 gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev,
3465                                         u32 raster_config, u32 raster_config_1,
3466                                         unsigned rb_mask, unsigned num_rb)
3467 {
3468         unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1);
3469         unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1);
3470         unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2);
3471         unsigned rb_per_se = num_rb / num_se;
3472         unsigned se_mask[4];
3473         unsigned se;
3474
3475         se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask;
3476         se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask;
3477         se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask;
3478         se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask;
3479
3480         WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4));
3481         WARN_ON(!(sh_per_se == 1 || sh_per_se == 2));
3482         WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2));
3483
3484         if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
3485                              (!se_mask[2] && !se_mask[3]))) {
3486                 raster_config_1 &= ~SE_PAIR_MAP_MASK;
3487
3488                 if (!se_mask[0] && !se_mask[1]) {
3489                         raster_config_1 |=
3490                                 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3);
3491                 } else {
3492                         raster_config_1 |=
3493                                 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0);
3494                 }
3495         }
3496
3497         for (se = 0; se < num_se; se++) {
3498                 unsigned raster_config_se = raster_config;
3499                 unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
3500                 unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
3501                 int idx = (se / 2) * 2;
3502
3503                 if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
3504                         raster_config_se &= ~SE_MAP_MASK;
3505
3506                         if (!se_mask[idx]) {
3507                                 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3);
3508                         } else {
3509                                 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0);
3510                         }
3511                 }
3512
3513                 pkr0_mask &= rb_mask;
3514                 pkr1_mask &= rb_mask;
3515                 if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
3516                         raster_config_se &= ~PKR_MAP_MASK;
3517
3518                         if (!pkr0_mask) {
3519                                 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3);
3520                         } else {
3521                                 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0);
3522                         }
3523                 }
3524
3525                 if (rb_per_se >= 2) {
3526                         unsigned rb0_mask = 1 << (se * rb_per_se);
3527                         unsigned rb1_mask = rb0_mask << 1;
3528
3529                         rb0_mask &= rb_mask;
3530                         rb1_mask &= rb_mask;
3531                         if (!rb0_mask || !rb1_mask) {
3532                                 raster_config_se &= ~RB_MAP_PKR0_MASK;
3533
3534                                 if (!rb0_mask) {
3535                                         raster_config_se |=
3536                                                 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3);
3537                                 } else {
3538                                         raster_config_se |=
3539                                                 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0);
3540                                 }
3541                         }
3542
3543                         if (rb_per_se > 2) {
3544                                 rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
3545                                 rb1_mask = rb0_mask << 1;
3546                                 rb0_mask &= rb_mask;
3547                                 rb1_mask &= rb_mask;
3548                                 if (!rb0_mask || !rb1_mask) {
3549                                         raster_config_se &= ~RB_MAP_PKR1_MASK;
3550
3551                                         if (!rb0_mask) {
3552                                                 raster_config_se |=
3553                                                         RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3);
3554                                         } else {
3555                                                 raster_config_se |=
3556                                                         RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0);
3557                                         }
3558                                 }
3559                         }
3560                 }
3561
3562                 /* GRBM_GFX_INDEX has a different offset on VI */
3563                 gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff);
3564                 WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se);
3565                 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3566         }
3567
3568         /* GRBM_GFX_INDEX has a different offset on VI */
3569         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3570 }
3571
3572 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
3573 {
3574         int i, j;
3575         u32 data;
3576         u32 raster_config = 0, raster_config_1 = 0;
3577         u32 active_rbs = 0;
3578         u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
3579                                         adev->gfx.config.max_sh_per_se;
3580         unsigned num_rb_pipes;
3581
3582         mutex_lock(&adev->grbm_idx_mutex);
3583         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3584                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3585                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3586                         data = gfx_v8_0_get_rb_active_bitmap(adev);
3587                         active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
3588                                                rb_bitmap_width_per_sh);
3589                 }
3590         }
3591         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3592
3593         adev->gfx.config.backend_enable_mask = active_rbs;
3594         adev->gfx.config.num_rbs = hweight32(active_rbs);
3595
3596         num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se *
3597                              adev->gfx.config.max_shader_engines, 16);
3598
3599         gfx_v8_0_raster_config(adev, &raster_config, &raster_config_1);
3600
3601         if (!adev->gfx.config.backend_enable_mask ||
3602                         adev->gfx.config.num_rbs >= num_rb_pipes) {
3603                 WREG32(mmPA_SC_RASTER_CONFIG, raster_config);
3604                 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3605         } else {
3606                 gfx_v8_0_write_harvested_raster_configs(adev, raster_config, raster_config_1,
3607                                                         adev->gfx.config.backend_enable_mask,
3608                                                         num_rb_pipes);
3609         }
3610
3611         /* cache the values for userspace */
3612         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3613                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3614                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3615                         adev->gfx.config.rb_config[i][j].rb_backend_disable =
3616                                 RREG32(mmCC_RB_BACKEND_DISABLE);
3617                         adev->gfx.config.rb_config[i][j].user_rb_backend_disable =
3618                                 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3619                         adev->gfx.config.rb_config[i][j].raster_config =
3620                                 RREG32(mmPA_SC_RASTER_CONFIG);
3621                         adev->gfx.config.rb_config[i][j].raster_config_1 =
3622                                 RREG32(mmPA_SC_RASTER_CONFIG_1);
3623                 }
3624         }
3625         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3626         mutex_unlock(&adev->grbm_idx_mutex);
3627 }
3628
3629 /**
3630  * gfx_v8_0_init_compute_vmid - gart enable
3631  *
3632  * @adev: amdgpu_device pointer
3633  *
3634  * Initialize compute vmid sh_mem registers
3635  *
3636  */
3637 #define DEFAULT_SH_MEM_BASES    (0x6000)
3638 #define FIRST_COMPUTE_VMID      (8)
3639 #define LAST_COMPUTE_VMID       (16)
3640 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
3641 {
3642         int i;
3643         uint32_t sh_mem_config;
3644         uint32_t sh_mem_bases;
3645
3646         /*
3647          * Configure apertures:
3648          * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
3649          * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
3650          * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
3651          */
3652         sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
3653
3654         sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
3655                         SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
3656                         SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
3657                         SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
3658                         MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
3659                         SH_MEM_CONFIG__PRIVATE_ATC_MASK;
3660
3661         mutex_lock(&adev->srbm_mutex);
3662         for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
3663                 vi_srbm_select(adev, 0, 0, 0, i);
3664                 /* CP and shaders */
3665                 WREG32(mmSH_MEM_CONFIG, sh_mem_config);
3666                 WREG32(mmSH_MEM_APE1_BASE, 1);
3667                 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3668                 WREG32(mmSH_MEM_BASES, sh_mem_bases);
3669         }
3670         vi_srbm_select(adev, 0, 0, 0, 0);
3671         mutex_unlock(&adev->srbm_mutex);
3672 }
3673
3674 static void gfx_v8_0_config_init(struct amdgpu_device *adev)
3675 {
3676         switch (adev->asic_type) {
3677         default:
3678                 adev->gfx.config.double_offchip_lds_buf = 1;
3679                 break;
3680         case CHIP_CARRIZO:
3681         case CHIP_STONEY:
3682                 adev->gfx.config.double_offchip_lds_buf = 0;
3683                 break;
3684         }
3685 }
3686
3687 static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
3688 {
3689         u32 tmp, sh_static_mem_cfg;
3690         int i;
3691
3692         WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF);
3693         WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3694         WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3695         WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
3696
3697         gfx_v8_0_tiling_mode_table_init(adev);
3698         gfx_v8_0_setup_rb(adev);
3699         gfx_v8_0_get_cu_info(adev);
3700         gfx_v8_0_config_init(adev);
3701
3702         /* XXX SH_MEM regs */
3703         /* where to put LDS, scratch, GPUVM in FSA64 space */
3704         sh_static_mem_cfg = REG_SET_FIELD(0, SH_STATIC_MEM_CONFIG,
3705                                    SWIZZLE_ENABLE, 1);
3706         sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3707                                    ELEMENT_SIZE, 1);
3708         sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3709                                    INDEX_STRIDE, 3);
3710         mutex_lock(&adev->srbm_mutex);
3711         for (i = 0; i < adev->vm_manager.id_mgr[0].num_ids; i++) {
3712                 vi_srbm_select(adev, 0, 0, 0, i);
3713                 /* CP and shaders */
3714                 if (i == 0) {
3715                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
3716                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3717                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3718                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3719                         WREG32(mmSH_MEM_CONFIG, tmp);
3720                         WREG32(mmSH_MEM_BASES, 0);
3721                 } else {
3722                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
3723                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3724                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3725                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3726                         WREG32(mmSH_MEM_CONFIG, tmp);
3727                         tmp = adev->mc.shared_aperture_start >> 48;
3728                         WREG32(mmSH_MEM_BASES, tmp);
3729                 }
3730
3731                 WREG32(mmSH_MEM_APE1_BASE, 1);
3732                 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3733                 WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg);
3734         }
3735         vi_srbm_select(adev, 0, 0, 0, 0);
3736         mutex_unlock(&adev->srbm_mutex);
3737
3738         gfx_v8_0_init_compute_vmid(adev);
3739
3740         mutex_lock(&adev->grbm_idx_mutex);
3741         /*
3742          * making sure that the following register writes will be broadcasted
3743          * to all the shaders
3744          */
3745         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3746
3747         WREG32(mmPA_SC_FIFO_SIZE,
3748                    (adev->gfx.config.sc_prim_fifo_size_frontend <<
3749                         PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
3750                    (adev->gfx.config.sc_prim_fifo_size_backend <<
3751                         PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
3752                    (adev->gfx.config.sc_hiz_tile_fifo_size <<
3753                         PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
3754                    (adev->gfx.config.sc_earlyz_tile_fifo_size <<
3755                         PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
3756
3757         tmp = RREG32(mmSPI_ARB_PRIORITY);
3758         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS0, 2);
3759         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS1, 2);
3760         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS2, 2);
3761         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS3, 2);
3762         WREG32(mmSPI_ARB_PRIORITY, tmp);
3763
3764         mutex_unlock(&adev->grbm_idx_mutex);
3765
3766 }
3767
3768 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3769 {
3770         u32 i, j, k;
3771         u32 mask;
3772
3773         mutex_lock(&adev->grbm_idx_mutex);
3774         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3775                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3776                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3777                         for (k = 0; k < adev->usec_timeout; k++) {
3778                                 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3779                                         break;
3780                                 udelay(1);
3781                         }
3782                 }
3783         }
3784         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3785         mutex_unlock(&adev->grbm_idx_mutex);
3786
3787         mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3788                 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3789                 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3790                 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3791         for (k = 0; k < adev->usec_timeout; k++) {
3792                 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3793                         break;
3794                 udelay(1);
3795         }
3796 }
3797
3798 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3799                                                bool enable)
3800 {
3801         u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3802
3803         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
3804         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
3805         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
3806         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
3807
3808         WREG32(mmCP_INT_CNTL_RING0, tmp);
3809 }
3810
3811 static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
3812 {
3813         /* csib */
3814         WREG32(mmRLC_CSIB_ADDR_HI,
3815                         adev->gfx.rlc.clear_state_gpu_addr >> 32);
3816         WREG32(mmRLC_CSIB_ADDR_LO,
3817                         adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
3818         WREG32(mmRLC_CSIB_LENGTH,
3819                         adev->gfx.rlc.clear_state_size);
3820 }
3821
3822 static void gfx_v8_0_parse_ind_reg_list(int *register_list_format,
3823                                 int ind_offset,
3824                                 int list_size,
3825                                 int *unique_indices,
3826                                 int *indices_count,
3827                                 int max_indices,
3828                                 int *ind_start_offsets,
3829                                 int *offset_count,
3830                                 int max_offset)
3831 {
3832         int indices;
3833         bool new_entry = true;
3834
3835         for (; ind_offset < list_size; ind_offset++) {
3836
3837                 if (new_entry) {
3838                         new_entry = false;
3839                         ind_start_offsets[*offset_count] = ind_offset;
3840                         *offset_count = *offset_count + 1;
3841                         BUG_ON(*offset_count >= max_offset);
3842                 }
3843
3844                 if (register_list_format[ind_offset] == 0xFFFFFFFF) {
3845                         new_entry = true;
3846                         continue;
3847                 }
3848
3849                 ind_offset += 2;
3850
3851                 /* look for the matching indice */
3852                 for (indices = 0;
3853                         indices < *indices_count;
3854                         indices++) {
3855                         if (unique_indices[indices] ==
3856                                 register_list_format[ind_offset])
3857                                 break;
3858                 }
3859
3860                 if (indices >= *indices_count) {
3861                         unique_indices[*indices_count] =
3862                                 register_list_format[ind_offset];
3863                         indices = *indices_count;
3864                         *indices_count = *indices_count + 1;
3865                         BUG_ON(*indices_count >= max_indices);
3866                 }
3867
3868                 register_list_format[ind_offset] = indices;
3869         }
3870 }
3871
3872 static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
3873 {
3874         int i, temp, data;
3875         int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0};
3876         int indices_count = 0;
3877         int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3878         int offset_count = 0;
3879
3880         int list_size;
3881         unsigned int *register_list_format =
3882                 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
3883         if (!register_list_format)
3884                 return -ENOMEM;
3885         memcpy(register_list_format, adev->gfx.rlc.register_list_format,
3886                         adev->gfx.rlc.reg_list_format_size_bytes);
3887
3888         gfx_v8_0_parse_ind_reg_list(register_list_format,
3889                                 RLC_FormatDirectRegListLength,
3890                                 adev->gfx.rlc.reg_list_format_size_bytes >> 2,
3891                                 unique_indices,
3892                                 &indices_count,
3893                                 sizeof(unique_indices) / sizeof(int),
3894                                 indirect_start_offsets,
3895                                 &offset_count,
3896                                 sizeof(indirect_start_offsets)/sizeof(int));
3897
3898         /* save and restore list */
3899         WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1);
3900
3901         WREG32(mmRLC_SRM_ARAM_ADDR, 0);
3902         for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
3903                 WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]);
3904
3905         /* indirect list */
3906         WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start);
3907         for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
3908                 WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]);
3909
3910         list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
3911         list_size = list_size >> 1;
3912         WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size);
3913         WREG32(mmRLC_GPM_SCRATCH_DATA, list_size);
3914
3915         /* starting offsets starts */
3916         WREG32(mmRLC_GPM_SCRATCH_ADDR,
3917                 adev->gfx.rlc.starting_offsets_start);
3918         for (i = 0; i < sizeof(indirect_start_offsets)/sizeof(int); i++)
3919                 WREG32(mmRLC_GPM_SCRATCH_DATA,
3920                                 indirect_start_offsets[i]);
3921
3922         /* unique indices */
3923         temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
3924         data = mmRLC_SRM_INDEX_CNTL_DATA_0;
3925         for (i = 0; i < sizeof(unique_indices) / sizeof(int); i++) {
3926                 if (unique_indices[i] != 0) {
3927                         WREG32(temp + i, unique_indices[i] & 0x3FFFF);
3928                         WREG32(data + i, unique_indices[i] >> 20);
3929                 }
3930         }
3931         kfree(register_list_format);
3932
3933         return 0;
3934 }
3935
3936 static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
3937 {
3938         WREG32_FIELD(RLC_SRM_CNTL, SRM_ENABLE, 1);
3939 }
3940
3941 static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev)
3942 {
3943         uint32_t data;
3944
3945         WREG32_FIELD(CP_RB_WPTR_POLL_CNTL, IDLE_POLL_COUNT, 0x60);
3946
3947         data = REG_SET_FIELD(0, RLC_PG_DELAY, POWER_UP_DELAY, 0x10);
3948         data = REG_SET_FIELD(data, RLC_PG_DELAY, POWER_DOWN_DELAY, 0x10);
3949         data = REG_SET_FIELD(data, RLC_PG_DELAY, CMD_PROPAGATE_DELAY, 0x10);
3950         data = REG_SET_FIELD(data, RLC_PG_DELAY, MEM_SLEEP_DELAY, 0x10);
3951         WREG32(mmRLC_PG_DELAY, data);
3952
3953         WREG32_FIELD(RLC_PG_DELAY_2, SERDES_CMD_DELAY, 0x3);
3954         WREG32_FIELD(RLC_AUTO_PG_CTRL, GRBM_REG_SAVE_GFX_IDLE_THRESHOLD, 0x55f0);
3955
3956 }
3957
3958 static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
3959                                                 bool enable)
3960 {
3961         WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PU_ENABLE, enable ? 1 : 0);
3962 }
3963
3964 static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
3965                                                   bool enable)
3966 {
3967         WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PD_ENABLE, enable ? 1 : 0);
3968 }
3969
3970 static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable)
3971 {
3972         WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 0 : 1);
3973 }
3974
3975 static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
3976 {
3977         if ((adev->asic_type == CHIP_CARRIZO) ||
3978             (adev->asic_type == CHIP_STONEY)) {
3979                 gfx_v8_0_init_csb(adev);
3980                 gfx_v8_0_init_save_restore_list(adev);
3981                 gfx_v8_0_enable_save_restore_machine(adev);
3982                 WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
3983                 gfx_v8_0_init_power_gating(adev);
3984                 WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
3985         } else if ((adev->asic_type == CHIP_POLARIS11) ||
3986                    (adev->asic_type == CHIP_POLARIS12)) {
3987                 gfx_v8_0_init_csb(adev);
3988                 gfx_v8_0_init_save_restore_list(adev);
3989                 gfx_v8_0_enable_save_restore_machine(adev);
3990                 gfx_v8_0_init_power_gating(adev);
3991         }
3992
3993 }
3994
3995 static void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
3996 {
3997         WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 0);
3998
3999         gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4000         gfx_v8_0_wait_for_rlc_serdes(adev);
4001 }
4002
4003 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
4004 {
4005         WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4006         udelay(50);
4007
4008         WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
4009         udelay(50);
4010 }
4011
4012 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
4013 {
4014         WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 1);
4015
4016         /* carrizo do enable cp interrupt after cp inited */
4017         if (!(adev->flags & AMD_IS_APU))
4018                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4019
4020         udelay(50);
4021 }
4022
4023 static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev)
4024 {
4025         const struct rlc_firmware_header_v2_0 *hdr;
4026         const __le32 *fw_data;
4027         unsigned i, fw_size;
4028
4029         if (!adev->gfx.rlc_fw)
4030                 return -EINVAL;
4031
4032         hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
4033         amdgpu_ucode_print_rlc_hdr(&hdr->header);
4034
4035         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
4036                            le32_to_cpu(hdr->header.ucode_array_offset_bytes));
4037         fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
4038
4039         WREG32(mmRLC_GPM_UCODE_ADDR, 0);
4040         for (i = 0; i < fw_size; i++)
4041                 WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
4042         WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
4043
4044         return 0;
4045 }
4046
4047 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
4048 {
4049         int r;
4050         u32 tmp;
4051
4052         gfx_v8_0_rlc_stop(adev);
4053
4054         /* disable CG */
4055         tmp = RREG32(mmRLC_CGCG_CGLS_CTRL);
4056         tmp &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
4057                  RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4058         WREG32(mmRLC_CGCG_CGLS_CTRL, tmp);
4059         if (adev->asic_type == CHIP_POLARIS11 ||
4060             adev->asic_type == CHIP_POLARIS10 ||
4061             adev->asic_type == CHIP_POLARIS12) {
4062                 tmp = RREG32(mmRLC_CGCG_CGLS_CTRL_3D);
4063                 tmp &= ~0x3;
4064                 WREG32(mmRLC_CGCG_CGLS_CTRL_3D, tmp);
4065         }
4066
4067         /* disable PG */
4068         WREG32(mmRLC_PG_CNTL, 0);
4069
4070         gfx_v8_0_rlc_reset(adev);
4071         gfx_v8_0_init_pg(adev);
4072
4073         if (!adev->pp_enabled) {
4074                 if (adev->firmware.load_type != AMDGPU_FW_LOAD_SMU) {
4075                         /* legacy rlc firmware loading */
4076                         r = gfx_v8_0_rlc_load_microcode(adev);
4077                         if (r)
4078                                 return r;
4079                 } else {
4080                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4081                                                         AMDGPU_UCODE_ID_RLC_G);
4082                         if (r)
4083                                 return -EINVAL;
4084                 }
4085         }
4086
4087         gfx_v8_0_rlc_start(adev);
4088
4089         return 0;
4090 }
4091
4092 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
4093 {
4094         int i;
4095         u32 tmp = RREG32(mmCP_ME_CNTL);
4096
4097         if (enable) {
4098                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
4099                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
4100                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
4101         } else {
4102                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
4103                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
4104                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
4105                 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
4106                         adev->gfx.gfx_ring[i].ready = false;
4107         }
4108         WREG32(mmCP_ME_CNTL, tmp);
4109         udelay(50);
4110 }
4111
4112 static int gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
4113 {
4114         const struct gfx_firmware_header_v1_0 *pfp_hdr;
4115         const struct gfx_firmware_header_v1_0 *ce_hdr;
4116         const struct gfx_firmware_header_v1_0 *me_hdr;
4117         const __le32 *fw_data;
4118         unsigned i, fw_size;
4119
4120         if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
4121                 return -EINVAL;
4122
4123         pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
4124                 adev->gfx.pfp_fw->data;
4125         ce_hdr = (const struct gfx_firmware_header_v1_0 *)
4126                 adev->gfx.ce_fw->data;
4127         me_hdr = (const struct gfx_firmware_header_v1_0 *)
4128                 adev->gfx.me_fw->data;
4129
4130         amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
4131         amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
4132         amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
4133
4134         gfx_v8_0_cp_gfx_enable(adev, false);
4135
4136         /* PFP */
4137         fw_data = (const __le32 *)
4138                 (adev->gfx.pfp_fw->data +
4139                  le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
4140         fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
4141         WREG32(mmCP_PFP_UCODE_ADDR, 0);
4142         for (i = 0; i < fw_size; i++)
4143                 WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
4144         WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
4145
4146         /* CE */
4147         fw_data = (const __le32 *)
4148                 (adev->gfx.ce_fw->data +
4149                  le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
4150         fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
4151         WREG32(mmCP_CE_UCODE_ADDR, 0);
4152         for (i = 0; i < fw_size; i++)
4153                 WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
4154         WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
4155
4156         /* ME */
4157         fw_data = (const __le32 *)
4158                 (adev->gfx.me_fw->data +
4159                  le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
4160         fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
4161         WREG32(mmCP_ME_RAM_WADDR, 0);
4162         for (i = 0; i < fw_size; i++)
4163                 WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
4164         WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
4165
4166         return 0;
4167 }
4168
4169 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
4170 {
4171         u32 count = 0;
4172         const struct cs_section_def *sect = NULL;
4173         const struct cs_extent_def *ext = NULL;
4174
4175         /* begin clear state */
4176         count += 2;
4177         /* context control state */
4178         count += 3;
4179
4180         for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4181                 for (ext = sect->section; ext->extent != NULL; ++ext) {
4182                         if (sect->id == SECT_CONTEXT)
4183                                 count += 2 + ext->reg_count;
4184                         else
4185                                 return 0;
4186                 }
4187         }
4188         /* pa_sc_raster_config/pa_sc_raster_config1 */
4189         count += 4;
4190         /* end clear state */
4191         count += 2;
4192         /* clear state */
4193         count += 2;
4194
4195         return count;
4196 }
4197
4198 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
4199 {
4200         struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
4201         const struct cs_section_def *sect = NULL;
4202         const struct cs_extent_def *ext = NULL;
4203         int r, i;
4204
4205         /* init the CP */
4206         WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
4207         WREG32(mmCP_ENDIAN_SWAP, 0);
4208         WREG32(mmCP_DEVICE_ID, 1);
4209
4210         gfx_v8_0_cp_gfx_enable(adev, true);
4211
4212         r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
4213         if (r) {
4214                 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
4215                 return r;
4216         }
4217
4218         /* clear state buffer */
4219         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4220         amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4221
4222         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4223         amdgpu_ring_write(ring, 0x80000000);
4224         amdgpu_ring_write(ring, 0x80000000);
4225
4226         for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4227                 for (ext = sect->section; ext->extent != NULL; ++ext) {
4228                         if (sect->id == SECT_CONTEXT) {
4229                                 amdgpu_ring_write(ring,
4230                                        PACKET3(PACKET3_SET_CONTEXT_REG,
4231                                                ext->reg_count));
4232                                 amdgpu_ring_write(ring,
4233                                        ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
4234                                 for (i = 0; i < ext->reg_count; i++)
4235                                         amdgpu_ring_write(ring, ext->extent[i]);
4236                         }
4237                 }
4238         }
4239
4240         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4241         amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
4242         switch (adev->asic_type) {
4243         case CHIP_TONGA:
4244         case CHIP_POLARIS10:
4245                 amdgpu_ring_write(ring, 0x16000012);
4246                 amdgpu_ring_write(ring, 0x0000002A);
4247                 break;
4248         case CHIP_POLARIS11:
4249         case CHIP_POLARIS12:
4250                 amdgpu_ring_write(ring, 0x16000012);
4251                 amdgpu_ring_write(ring, 0x00000000);
4252                 break;
4253         case CHIP_FIJI:
4254                 amdgpu_ring_write(ring, 0x3a00161a);
4255                 amdgpu_ring_write(ring, 0x0000002e);
4256                 break;
4257         case CHIP_CARRIZO:
4258                 amdgpu_ring_write(ring, 0x00000002);
4259                 amdgpu_ring_write(ring, 0x00000000);
4260                 break;
4261         case CHIP_TOPAZ:
4262                 amdgpu_ring_write(ring, adev->gfx.config.num_rbs == 1 ?
4263                                 0x00000000 : 0x00000002);
4264                 amdgpu_ring_write(ring, 0x00000000);
4265                 break;
4266         case CHIP_STONEY:
4267                 amdgpu_ring_write(ring, 0x00000000);
4268                 amdgpu_ring_write(ring, 0x00000000);
4269                 break;
4270         default:
4271                 BUG();
4272         }
4273
4274         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4275         amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4276
4277         amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4278         amdgpu_ring_write(ring, 0);
4279
4280         /* init the CE partitions */
4281         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4282         amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4283         amdgpu_ring_write(ring, 0x8000);
4284         amdgpu_ring_write(ring, 0x8000);
4285
4286         amdgpu_ring_commit(ring);
4287
4288         return 0;
4289 }
4290 static void gfx_v8_0_set_cpg_door_bell(struct amdgpu_device *adev, struct amdgpu_ring *ring)
4291 {
4292         u32 tmp;
4293         /* no gfx doorbells on iceland */
4294         if (adev->asic_type == CHIP_TOPAZ)
4295                 return;
4296
4297         tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
4298
4299         if (ring->use_doorbell) {
4300                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4301                                 DOORBELL_OFFSET, ring->doorbell_index);
4302                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4303                                                 DOORBELL_HIT, 0);
4304                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4305                                             DOORBELL_EN, 1);
4306         } else {
4307                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
4308         }
4309
4310         WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
4311
4312         if (adev->flags & AMD_IS_APU)
4313                 return;
4314
4315         tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
4316                                         DOORBELL_RANGE_LOWER,
4317                                         AMDGPU_DOORBELL_GFX_RING0);
4318         WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
4319
4320         WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
4321                 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
4322 }
4323
4324 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
4325 {
4326         struct amdgpu_ring *ring;
4327         u32 tmp;
4328         u32 rb_bufsz;
4329         u64 rb_addr, rptr_addr, wptr_gpu_addr;
4330         int r;
4331
4332         /* Set the write pointer delay */
4333         WREG32(mmCP_RB_WPTR_DELAY, 0);
4334
4335         /* set the RB to use vmid 0 */
4336         WREG32(mmCP_RB_VMID, 0);
4337
4338         /* Set ring buffer size */
4339         ring = &adev->gfx.gfx_ring[0];
4340         rb_bufsz = order_base_2(ring->ring_size / 8);
4341         tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
4342         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
4343         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
4344         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
4345 #ifdef __BIG_ENDIAN
4346         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
4347 #endif
4348         WREG32(mmCP_RB0_CNTL, tmp);
4349
4350         /* Initialize the ring buffer's read and write pointers */
4351         WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
4352         ring->wptr = 0;
4353         WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
4354
4355         /* set the wb address wether it's enabled or not */
4356         rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4357         WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
4358         WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
4359
4360         wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4361         WREG32(mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
4362         WREG32(mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
4363         mdelay(1);
4364         WREG32(mmCP_RB0_CNTL, tmp);
4365
4366         rb_addr = ring->gpu_addr >> 8;
4367         WREG32(mmCP_RB0_BASE, rb_addr);
4368         WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
4369
4370         gfx_v8_0_set_cpg_door_bell(adev, ring);
4371         /* start the ring */
4372         amdgpu_ring_clear_ring(ring);
4373         gfx_v8_0_cp_gfx_start(adev);
4374         ring->ready = true;
4375         r = amdgpu_ring_test_ring(ring);
4376         if (r)
4377                 ring->ready = false;
4378
4379         return r;
4380 }
4381
4382 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
4383 {
4384         int i;
4385
4386         if (enable) {
4387                 WREG32(mmCP_MEC_CNTL, 0);
4388         } else {
4389                 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
4390                 for (i = 0; i < adev->gfx.num_compute_rings; i++)
4391                         adev->gfx.compute_ring[i].ready = false;
4392                 adev->gfx.kiq.ring.ready = false;
4393         }
4394         udelay(50);
4395 }
4396
4397 static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev)
4398 {
4399         const struct gfx_firmware_header_v1_0 *mec_hdr;
4400         const __le32 *fw_data;
4401         unsigned i, fw_size;
4402
4403         if (!adev->gfx.mec_fw)
4404                 return -EINVAL;
4405
4406         gfx_v8_0_cp_compute_enable(adev, false);
4407
4408         mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
4409         amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
4410
4411         fw_data = (const __le32 *)
4412                 (adev->gfx.mec_fw->data +
4413                  le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4414         fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4415
4416         /* MEC1 */
4417         WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0);
4418         for (i = 0; i < fw_size; i++)
4419                 WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data+i));
4420         WREG32(mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version);
4421
4422         /* Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
4423         if (adev->gfx.mec2_fw) {
4424                 const struct gfx_firmware_header_v1_0 *mec2_hdr;
4425
4426                 mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
4427                 amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header);
4428
4429                 fw_data = (const __le32 *)
4430                         (adev->gfx.mec2_fw->data +
4431                          le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4432                 fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4433
4434                 WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0);
4435                 for (i = 0; i < fw_size; i++)
4436                         WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data+i));
4437                 WREG32(mmCP_MEC_ME2_UCODE_ADDR, adev->gfx.mec2_fw_version);
4438         }
4439
4440         return 0;
4441 }
4442
4443 /* KIQ functions */
4444 static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring)
4445 {
4446         uint32_t tmp;
4447         struct amdgpu_device *adev = ring->adev;
4448
4449         /* tell RLC which is KIQ queue */
4450         tmp = RREG32(mmRLC_CP_SCHEDULERS);
4451         tmp &= 0xffffff00;
4452         tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
4453         WREG32(mmRLC_CP_SCHEDULERS, tmp);
4454         tmp |= 0x80;
4455         WREG32(mmRLC_CP_SCHEDULERS, tmp);
4456 }
4457
4458 static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev)
4459 {
4460         struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
4461         uint32_t scratch, tmp = 0;
4462         uint64_t queue_mask = 0;
4463         int r, i;
4464
4465         for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
4466                 if (!test_bit(i, adev->gfx.mec.queue_bitmap))
4467                         continue;
4468
4469                 /* This situation may be hit in the future if a new HW
4470                  * generation exposes more than 64 queues. If so, the
4471                  * definition of queue_mask needs updating */
4472                 if (WARN_ON(i >= (sizeof(queue_mask)*8))) {
4473                         DRM_ERROR("Invalid KCQ enabled: %d\n", i);
4474                         break;
4475                 }
4476
4477                 queue_mask |= (1ull << i);
4478         }
4479
4480         r = amdgpu_gfx_scratch_get(adev, &scratch);
4481         if (r) {
4482                 DRM_ERROR("Failed to get scratch reg (%d).\n", r);
4483                 return r;
4484         }
4485         WREG32(scratch, 0xCAFEDEAD);
4486
4487         r = amdgpu_ring_alloc(kiq_ring, (8 * adev->gfx.num_compute_rings) + 11);
4488         if (r) {
4489                 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
4490                 amdgpu_gfx_scratch_free(adev, scratch);
4491                 return r;
4492         }
4493         /* set resources */
4494         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
4495         amdgpu_ring_write(kiq_ring, 0); /* vmid_mask:0 queue_type:0 (KIQ) */
4496         amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */
4497         amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */
4498         amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
4499         amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
4500         amdgpu_ring_write(kiq_ring, 0); /* oac mask */
4501         amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
4502         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4503                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4504                 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
4505                 uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4506
4507                 /* map queues */
4508                 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
4509                 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
4510                 amdgpu_ring_write(kiq_ring,
4511                                   PACKET3_MAP_QUEUES_NUM_QUEUES(1));
4512                 amdgpu_ring_write(kiq_ring,
4513                                   PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index) |
4514                                   PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
4515                                   PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
4516                                   PACKET3_MAP_QUEUES_ME(ring->me == 1 ? 0 : 1)); /* doorbell */
4517                 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
4518                 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
4519                 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
4520                 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
4521         }
4522         /* write to scratch for completion */
4523         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
4524         amdgpu_ring_write(kiq_ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
4525         amdgpu_ring_write(kiq_ring, 0xDEADBEEF);
4526         amdgpu_ring_commit(kiq_ring);
4527
4528         for (i = 0; i < adev->usec_timeout; i++) {
4529                 tmp = RREG32(scratch);
4530                 if (tmp == 0xDEADBEEF)
4531                         break;
4532                 DRM_UDELAY(1);
4533         }
4534         if (i >= adev->usec_timeout) {
4535                 DRM_ERROR("KCQ enable failed (scratch(0x%04X)=0x%08X)\n",
4536                           scratch, tmp);
4537                 r = -EINVAL;
4538         }
4539         amdgpu_gfx_scratch_free(adev, scratch);
4540
4541         return r;
4542 }
4543
4544 static int gfx_v8_0_deactivate_hqd(struct amdgpu_device *adev, u32 req)
4545 {
4546         int i, r = 0;
4547
4548         if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) {
4549                 WREG32_FIELD(CP_HQD_DEQUEUE_REQUEST, DEQUEUE_REQ, req);
4550                 for (i = 0; i < adev->usec_timeout; i++) {
4551                         if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK))
4552                                 break;
4553                         udelay(1);
4554                 }
4555                 if (i == adev->usec_timeout)
4556                         r = -ETIMEDOUT;
4557         }
4558         WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
4559         WREG32(mmCP_HQD_PQ_RPTR, 0);
4560         WREG32(mmCP_HQD_PQ_WPTR, 0);
4561
4562         return r;
4563 }
4564
4565 static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
4566 {
4567         struct amdgpu_device *adev = ring->adev;
4568         struct vi_mqd *mqd = ring->mqd_ptr;
4569         uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
4570         uint32_t tmp;
4571
4572         mqd->header = 0xC0310800;
4573         mqd->compute_pipelinestat_enable = 0x00000001;
4574         mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4575         mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4576         mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4577         mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4578         mqd->compute_misc_reserved = 0x00000003;
4579         if (!(adev->flags & AMD_IS_APU)) {
4580                 mqd->dynamic_cu_mask_addr_lo = lower_32_bits(ring->mqd_gpu_addr
4581                                              + offsetof(struct vi_mqd_allocation, dyamic_cu_mask));
4582                 mqd->dynamic_cu_mask_addr_hi = upper_32_bits(ring->mqd_gpu_addr
4583                                              + offsetof(struct vi_mqd_allocation, dyamic_cu_mask));
4584         }
4585         eop_base_addr = ring->eop_gpu_addr >> 8;
4586         mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
4587         mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
4588
4589         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4590         tmp = RREG32(mmCP_HQD_EOP_CONTROL);
4591         tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
4592                         (order_base_2(GFX8_MEC_HPD_SIZE / 4) - 1));
4593
4594         mqd->cp_hqd_eop_control = tmp;
4595
4596         /* enable doorbell? */
4597         tmp = REG_SET_FIELD(RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL),
4598                             CP_HQD_PQ_DOORBELL_CONTROL,
4599                             DOORBELL_EN,
4600                             ring->use_doorbell ? 1 : 0);
4601
4602         mqd->cp_hqd_pq_doorbell_control = tmp;
4603
4604         /* set the pointer to the MQD */
4605         mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
4606         mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
4607
4608         /* set MQD vmid to 0 */
4609         tmp = RREG32(mmCP_MQD_CONTROL);
4610         tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4611         mqd->cp_mqd_control = tmp;
4612
4613         /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4614         hqd_gpu_addr = ring->gpu_addr >> 8;
4615         mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4616         mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4617
4618         /* set up the HQD, this is similar to CP_RB0_CNTL */
4619         tmp = RREG32(mmCP_HQD_PQ_CONTROL);
4620         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4621                             (order_base_2(ring->ring_size / 4) - 1));
4622         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4623                         ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
4624 #ifdef __BIG_ENDIAN
4625         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
4626 #endif
4627         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
4628         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
4629         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4630         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4631         mqd->cp_hqd_pq_control = tmp;
4632
4633         /* set the wb address whether it's enabled or not */
4634         wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4635         mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4636         mqd->cp_hqd_pq_rptr_report_addr_hi =
4637                 upper_32_bits(wb_gpu_addr) & 0xffff;
4638
4639         /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4640         wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4641         mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
4642         mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4643
4644         tmp = 0;
4645         /* enable the doorbell if requested */
4646         if (ring->use_doorbell) {
4647                 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4648                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4649                                 DOORBELL_OFFSET, ring->doorbell_index);
4650
4651                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4652                                          DOORBELL_EN, 1);
4653                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4654                                          DOORBELL_SOURCE, 0);
4655                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4656                                          DOORBELL_HIT, 0);
4657         }
4658
4659         mqd->cp_hqd_pq_doorbell_control = tmp;
4660
4661         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4662         ring->wptr = 0;
4663         mqd->cp_hqd_pq_wptr = ring->wptr;
4664         mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
4665
4666         /* set the vmid for the queue */
4667         mqd->cp_hqd_vmid = 0;
4668
4669         tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
4670         tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
4671         mqd->cp_hqd_persistent_state = tmp;
4672
4673         /* set MTYPE */
4674         tmp = RREG32(mmCP_HQD_IB_CONTROL);
4675         tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
4676         tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MTYPE, 3);
4677         mqd->cp_hqd_ib_control = tmp;
4678
4679         tmp = RREG32(mmCP_HQD_IQ_TIMER);
4680         tmp = REG_SET_FIELD(tmp, CP_HQD_IQ_TIMER, MTYPE, 3);
4681         mqd->cp_hqd_iq_timer = tmp;
4682
4683         tmp = RREG32(mmCP_HQD_CTX_SAVE_CONTROL);
4684         tmp = REG_SET_FIELD(tmp, CP_HQD_CTX_SAVE_CONTROL, MTYPE, 3);
4685         mqd->cp_hqd_ctx_save_control = tmp;
4686
4687         /* defaults */
4688         mqd->cp_hqd_eop_rptr = RREG32(mmCP_HQD_EOP_RPTR);
4689         mqd->cp_hqd_eop_wptr = RREG32(mmCP_HQD_EOP_WPTR);
4690         mqd->cp_hqd_pipe_priority = RREG32(mmCP_HQD_PIPE_PRIORITY);
4691         mqd->cp_hqd_queue_priority = RREG32(mmCP_HQD_QUEUE_PRIORITY);
4692         mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM);
4693         mqd->cp_hqd_ctx_save_base_addr_lo = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_LO);
4694         mqd->cp_hqd_ctx_save_base_addr_hi = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_HI);
4695         mqd->cp_hqd_cntl_stack_offset = RREG32(mmCP_HQD_CNTL_STACK_OFFSET);
4696         mqd->cp_hqd_cntl_stack_size = RREG32(mmCP_HQD_CNTL_STACK_SIZE);
4697         mqd->cp_hqd_wg_state_offset = RREG32(mmCP_HQD_WG_STATE_OFFSET);
4698         mqd->cp_hqd_ctx_save_size = RREG32(mmCP_HQD_CTX_SAVE_SIZE);
4699         mqd->cp_hqd_eop_done_events = RREG32(mmCP_HQD_EOP_EVENTS);
4700         mqd->cp_hqd_error = RREG32(mmCP_HQD_ERROR);
4701         mqd->cp_hqd_eop_wptr_mem = RREG32(mmCP_HQD_EOP_WPTR_MEM);
4702         mqd->cp_hqd_eop_dones = RREG32(mmCP_HQD_EOP_DONES);
4703
4704         /* activate the queue */
4705         mqd->cp_hqd_active = 1;
4706
4707         return 0;
4708 }
4709
4710 int gfx_v8_0_mqd_commit(struct amdgpu_device *adev,
4711                         struct vi_mqd *mqd)
4712 {
4713         uint32_t mqd_reg;
4714         uint32_t *mqd_data;
4715
4716         /* HQD registers extend from mmCP_MQD_BASE_ADDR to mmCP_HQD_ERROR */
4717         mqd_data = &mqd->cp_mqd_base_addr_lo;
4718
4719         /* disable wptr polling */
4720         WREG32_FIELD(CP_PQ_WPTR_POLL_CNTL, EN, 0);
4721
4722         /* program all HQD registers */
4723         for (mqd_reg = mmCP_HQD_VMID; mqd_reg <= mmCP_HQD_EOP_CONTROL; mqd_reg++)
4724                 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4725
4726         /* Tonga errata: EOP RPTR/WPTR should be left unmodified.
4727          * This is safe since EOP RPTR==WPTR for any inactive HQD
4728          * on ASICs that do not support context-save.
4729          * EOP writes/reads can start anywhere in the ring.
4730          */
4731         if (adev->asic_type != CHIP_TONGA) {
4732                 WREG32(mmCP_HQD_EOP_RPTR, mqd->cp_hqd_eop_rptr);
4733                 WREG32(mmCP_HQD_EOP_WPTR, mqd->cp_hqd_eop_wptr);
4734                 WREG32(mmCP_HQD_EOP_WPTR_MEM, mqd->cp_hqd_eop_wptr_mem);
4735         }
4736
4737         for (mqd_reg = mmCP_HQD_EOP_EVENTS; mqd_reg <= mmCP_HQD_ERROR; mqd_reg++)
4738                 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4739
4740         /* activate the HQD */
4741         for (mqd_reg = mmCP_MQD_BASE_ADDR; mqd_reg <= mmCP_HQD_ACTIVE; mqd_reg++)
4742                 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4743
4744         return 0;
4745 }
4746
4747 static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring)
4748 {
4749         struct amdgpu_device *adev = ring->adev;
4750         struct vi_mqd *mqd = ring->mqd_ptr;
4751         int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
4752
4753         gfx_v8_0_kiq_setting(ring);
4754
4755         if (adev->gfx.in_reset) { /* for GPU_RESET case */
4756                 /* reset MQD to a clean status */
4757                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4758                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4759
4760                 /* reset ring buffer */
4761                 ring->wptr = 0;
4762                 amdgpu_ring_clear_ring(ring);
4763                 mutex_lock(&adev->srbm_mutex);
4764                 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4765                 gfx_v8_0_mqd_commit(adev, mqd);
4766                 vi_srbm_select(adev, 0, 0, 0, 0);
4767                 mutex_unlock(&adev->srbm_mutex);
4768         } else {
4769                 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4770                 ((struct vi_mqd_allocation *)mqd)->dyamic_cu_mask = 0xFFFFFFFF;
4771                 ((struct vi_mqd_allocation *)mqd)->dyamic_rb_mask = 0xFFFFFFFF;
4772                 mutex_lock(&adev->srbm_mutex);
4773                 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4774                 gfx_v8_0_mqd_init(ring);
4775                 gfx_v8_0_mqd_commit(adev, mqd);
4776                 vi_srbm_select(adev, 0, 0, 0, 0);
4777                 mutex_unlock(&adev->srbm_mutex);
4778
4779                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4780                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4781         }
4782
4783         return 0;
4784 }
4785
4786 static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring)
4787 {
4788         struct amdgpu_device *adev = ring->adev;
4789         struct vi_mqd *mqd = ring->mqd_ptr;
4790         int mqd_idx = ring - &adev->gfx.compute_ring[0];
4791
4792         if (!adev->gfx.in_reset && !adev->gfx.in_suspend) {
4793                 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4794                 ((struct vi_mqd_allocation *)mqd)->dyamic_cu_mask = 0xFFFFFFFF;
4795                 ((struct vi_mqd_allocation *)mqd)->dyamic_rb_mask = 0xFFFFFFFF;
4796                 mutex_lock(&adev->srbm_mutex);
4797                 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4798                 gfx_v8_0_mqd_init(ring);
4799                 vi_srbm_select(adev, 0, 0, 0, 0);
4800                 mutex_unlock(&adev->srbm_mutex);
4801
4802                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4803                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4804         } else if (adev->gfx.in_reset) { /* for GPU_RESET case */
4805                 /* reset MQD to a clean status */
4806                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4807                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4808                 /* reset ring buffer */
4809                 ring->wptr = 0;
4810                 amdgpu_ring_clear_ring(ring);
4811         } else {
4812                 amdgpu_ring_clear_ring(ring);
4813         }
4814         return 0;
4815 }
4816
4817 static void gfx_v8_0_set_mec_doorbell_range(struct amdgpu_device *adev)
4818 {
4819         if (adev->asic_type > CHIP_TONGA) {
4820                 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, AMDGPU_DOORBELL_KIQ << 2);
4821                 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, AMDGPU_DOORBELL_MEC_RING7 << 2);
4822         }
4823         /* enable doorbells */
4824         WREG32_FIELD(CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4825 }
4826
4827 static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev)
4828 {
4829         struct amdgpu_ring *ring = NULL;
4830         int r = 0, i;
4831
4832         gfx_v8_0_cp_compute_enable(adev, true);
4833
4834         ring = &adev->gfx.kiq.ring;
4835
4836         r = amdgpu_bo_reserve(ring->mqd_obj, false);
4837         if (unlikely(r != 0))
4838                 goto done;
4839
4840         r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4841         if (!r) {
4842                 r = gfx_v8_0_kiq_init_queue(ring);
4843                 amdgpu_bo_kunmap(ring->mqd_obj);
4844                 ring->mqd_ptr = NULL;
4845         }
4846         amdgpu_bo_unreserve(ring->mqd_obj);
4847         if (r)
4848                 goto done;
4849
4850         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4851                 ring = &adev->gfx.compute_ring[i];
4852
4853                 r = amdgpu_bo_reserve(ring->mqd_obj, false);
4854                 if (unlikely(r != 0))
4855                         goto done;
4856                 r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4857                 if (!r) {
4858                         r = gfx_v8_0_kcq_init_queue(ring);
4859                         amdgpu_bo_kunmap(ring->mqd_obj);
4860                         ring->mqd_ptr = NULL;
4861                 }
4862                 amdgpu_bo_unreserve(ring->mqd_obj);
4863                 if (r)
4864                         goto done;
4865         }
4866
4867         gfx_v8_0_set_mec_doorbell_range(adev);
4868
4869         r = gfx_v8_0_kiq_kcq_enable(adev);
4870         if (r)
4871                 goto done;
4872
4873         /* Test KIQ */
4874         ring = &adev->gfx.kiq.ring;
4875         ring->ready = true;
4876         r = amdgpu_ring_test_ring(ring);
4877         if (r) {
4878                 ring->ready = false;
4879                 goto done;
4880         }
4881
4882         /* Test KCQs */
4883         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4884                 ring = &adev->gfx.compute_ring[i];
4885                 ring->ready = true;
4886                 r = amdgpu_ring_test_ring(ring);
4887                 if (r)
4888                         ring->ready = false;
4889         }
4890
4891 done:
4892         return r;
4893 }
4894
4895 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
4896 {
4897         int r;
4898
4899         if (!(adev->flags & AMD_IS_APU))
4900                 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4901
4902         if (!adev->pp_enabled) {
4903                 if (adev->firmware.load_type != AMDGPU_FW_LOAD_SMU) {
4904                         /* legacy firmware loading */
4905                         r = gfx_v8_0_cp_gfx_load_microcode(adev);
4906                         if (r)
4907                                 return r;
4908
4909                         r = gfx_v8_0_cp_compute_load_microcode(adev);
4910                         if (r)
4911                                 return r;
4912                 } else {
4913                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4914                                                         AMDGPU_UCODE_ID_CP_CE);
4915                         if (r)
4916                                 return -EINVAL;
4917
4918                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4919                                                         AMDGPU_UCODE_ID_CP_PFP);
4920                         if (r)
4921                                 return -EINVAL;
4922
4923                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4924                                                         AMDGPU_UCODE_ID_CP_ME);
4925                         if (r)
4926                                 return -EINVAL;
4927
4928                         if (adev->asic_type == CHIP_TOPAZ) {
4929                                 r = gfx_v8_0_cp_compute_load_microcode(adev);
4930                                 if (r)
4931                                         return r;
4932                         } else {
4933                                 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4934                                                                                  AMDGPU_UCODE_ID_CP_MEC1);
4935                                 if (r)
4936                                         return -EINVAL;
4937                         }
4938                 }
4939         }
4940
4941         r = gfx_v8_0_cp_gfx_resume(adev);
4942         if (r)
4943                 return r;
4944
4945         r = gfx_v8_0_kiq_resume(adev);
4946         if (r)
4947                 return r;
4948
4949         gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4950
4951         return 0;
4952 }
4953
4954 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
4955 {
4956         gfx_v8_0_cp_gfx_enable(adev, enable);
4957         gfx_v8_0_cp_compute_enable(adev, enable);
4958 }
4959
4960 static int gfx_v8_0_hw_init(void *handle)
4961 {
4962         int r;
4963         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4964
4965         gfx_v8_0_init_golden_registers(adev);
4966         gfx_v8_0_gpu_init(adev);
4967
4968         r = gfx_v8_0_rlc_resume(adev);
4969         if (r)
4970                 return r;
4971
4972         r = gfx_v8_0_cp_resume(adev);
4973
4974         return r;
4975 }
4976
4977 static int gfx_v8_0_hw_fini(void *handle)
4978 {
4979         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4980
4981         amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
4982         amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
4983         if (amdgpu_sriov_vf(adev)) {
4984                 pr_debug("For SRIOV client, shouldn't do anything.\n");
4985                 return 0;
4986         }
4987         gfx_v8_0_cp_enable(adev, false);
4988         gfx_v8_0_rlc_stop(adev);
4989
4990         amdgpu_set_powergating_state(adev,
4991                         AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_UNGATE);
4992
4993         return 0;
4994 }
4995
4996 static int gfx_v8_0_suspend(void *handle)
4997 {
4998         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4999         adev->gfx.in_suspend = true;
5000         return gfx_v8_0_hw_fini(adev);
5001 }
5002
5003 static int gfx_v8_0_resume(void *handle)
5004 {
5005         int r;
5006         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5007
5008         r = gfx_v8_0_hw_init(adev);
5009         adev->gfx.in_suspend = false;
5010         return r;
5011 }
5012
5013 static bool gfx_v8_0_is_idle(void *handle)
5014 {
5015         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5016
5017         if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE))
5018                 return false;
5019         else
5020                 return true;
5021 }
5022
5023 static int gfx_v8_0_wait_for_idle(void *handle)
5024 {
5025         unsigned i;
5026         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5027
5028         for (i = 0; i < adev->usec_timeout; i++) {
5029                 if (gfx_v8_0_is_idle(handle))
5030                         return 0;
5031
5032                 udelay(1);
5033         }
5034         return -ETIMEDOUT;
5035 }
5036
5037 static bool gfx_v8_0_check_soft_reset(void *handle)
5038 {
5039         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5040         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5041         u32 tmp;
5042
5043         /* GRBM_STATUS */
5044         tmp = RREG32(mmGRBM_STATUS);
5045         if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
5046                    GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
5047                    GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
5048                    GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
5049                    GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
5050                    GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK |
5051                    GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
5052                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5053                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
5054                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5055                                                 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
5056                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5057                                                 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5058         }
5059
5060         /* GRBM_STATUS2 */
5061         tmp = RREG32(mmGRBM_STATUS2);
5062         if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
5063                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5064                                                 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
5065
5066         if (REG_GET_FIELD(tmp, GRBM_STATUS2, CPF_BUSY) ||
5067             REG_GET_FIELD(tmp, GRBM_STATUS2, CPC_BUSY) ||
5068             REG_GET_FIELD(tmp, GRBM_STATUS2, CPG_BUSY)) {
5069                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5070                                                 SOFT_RESET_CPF, 1);
5071                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5072                                                 SOFT_RESET_CPC, 1);
5073                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5074                                                 SOFT_RESET_CPG, 1);
5075                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET,
5076                                                 SOFT_RESET_GRBM, 1);
5077         }
5078
5079         /* SRBM_STATUS */
5080         tmp = RREG32(mmSRBM_STATUS);
5081         if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
5082                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5083                                                 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5084         if (REG_GET_FIELD(tmp, SRBM_STATUS, SEM_BUSY))
5085                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5086                                                 SRBM_SOFT_RESET, SOFT_RESET_SEM, 1);
5087
5088         if (grbm_soft_reset || srbm_soft_reset) {
5089                 adev->gfx.grbm_soft_reset = grbm_soft_reset;
5090                 adev->gfx.srbm_soft_reset = srbm_soft_reset;
5091                 return true;
5092         } else {
5093                 adev->gfx.grbm_soft_reset = 0;
5094                 adev->gfx.srbm_soft_reset = 0;
5095                 return false;
5096         }
5097 }
5098
5099 static int gfx_v8_0_pre_soft_reset(void *handle)
5100 {
5101         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5102         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5103
5104         if ((!adev->gfx.grbm_soft_reset) &&
5105             (!adev->gfx.srbm_soft_reset))
5106                 return 0;
5107
5108         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5109         srbm_soft_reset = adev->gfx.srbm_soft_reset;
5110
5111         /* stop the rlc */
5112         gfx_v8_0_rlc_stop(adev);
5113
5114         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5115             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5116                 /* Disable GFX parsing/prefetching */
5117                 gfx_v8_0_cp_gfx_enable(adev, false);
5118
5119         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5120             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5121             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5122             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5123                 int i;
5124
5125                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5126                         struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5127
5128                         mutex_lock(&adev->srbm_mutex);
5129                         vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5130                         gfx_v8_0_deactivate_hqd(adev, 2);
5131                         vi_srbm_select(adev, 0, 0, 0, 0);
5132                         mutex_unlock(&adev->srbm_mutex);
5133                 }
5134                 /* Disable MEC parsing/prefetching */
5135                 gfx_v8_0_cp_compute_enable(adev, false);
5136         }
5137
5138        return 0;
5139 }
5140
5141 static int gfx_v8_0_soft_reset(void *handle)
5142 {
5143         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5144         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5145         u32 tmp;
5146
5147         if ((!adev->gfx.grbm_soft_reset) &&
5148             (!adev->gfx.srbm_soft_reset))
5149                 return 0;
5150
5151         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5152         srbm_soft_reset = adev->gfx.srbm_soft_reset;
5153
5154         if (grbm_soft_reset || srbm_soft_reset) {
5155                 tmp = RREG32(mmGMCON_DEBUG);
5156                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 1);
5157                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 1);
5158                 WREG32(mmGMCON_DEBUG, tmp);
5159                 udelay(50);
5160         }
5161
5162         if (grbm_soft_reset) {
5163                 tmp = RREG32(mmGRBM_SOFT_RESET);
5164                 tmp |= grbm_soft_reset;
5165                 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5166                 WREG32(mmGRBM_SOFT_RESET, tmp);
5167                 tmp = RREG32(mmGRBM_SOFT_RESET);
5168
5169                 udelay(50);
5170
5171                 tmp &= ~grbm_soft_reset;
5172                 WREG32(mmGRBM_SOFT_RESET, tmp);
5173                 tmp = RREG32(mmGRBM_SOFT_RESET);
5174         }
5175
5176         if (srbm_soft_reset) {
5177                 tmp = RREG32(mmSRBM_SOFT_RESET);
5178                 tmp |= srbm_soft_reset;
5179                 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5180                 WREG32(mmSRBM_SOFT_RESET, tmp);
5181                 tmp = RREG32(mmSRBM_SOFT_RESET);
5182
5183                 udelay(50);
5184
5185                 tmp &= ~srbm_soft_reset;
5186                 WREG32(mmSRBM_SOFT_RESET, tmp);
5187                 tmp = RREG32(mmSRBM_SOFT_RESET);
5188         }
5189
5190         if (grbm_soft_reset || srbm_soft_reset) {
5191                 tmp = RREG32(mmGMCON_DEBUG);
5192                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 0);
5193                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 0);
5194                 WREG32(mmGMCON_DEBUG, tmp);
5195         }
5196
5197         /* Wait a little for things to settle down */
5198         udelay(50);
5199
5200         return 0;
5201 }
5202
5203 static int gfx_v8_0_post_soft_reset(void *handle)
5204 {
5205         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5206         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5207
5208         if ((!adev->gfx.grbm_soft_reset) &&
5209             (!adev->gfx.srbm_soft_reset))
5210                 return 0;
5211
5212         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5213         srbm_soft_reset = adev->gfx.srbm_soft_reset;
5214
5215         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5216             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5217                 gfx_v8_0_cp_gfx_resume(adev);
5218
5219         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5220             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5221             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5222             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5223                 int i;
5224
5225                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5226                         struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5227
5228                         mutex_lock(&adev->srbm_mutex);
5229                         vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5230                         gfx_v8_0_deactivate_hqd(adev, 2);
5231                         vi_srbm_select(adev, 0, 0, 0, 0);
5232                         mutex_unlock(&adev->srbm_mutex);
5233                 }
5234                 gfx_v8_0_kiq_resume(adev);
5235         }
5236         gfx_v8_0_rlc_start(adev);
5237
5238         return 0;
5239 }
5240
5241 /**
5242  * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
5243  *
5244  * @adev: amdgpu_device pointer
5245  *
5246  * Fetches a GPU clock counter snapshot.
5247  * Returns the 64 bit clock counter snapshot.
5248  */
5249 static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
5250 {
5251         uint64_t clock;
5252
5253         mutex_lock(&adev->gfx.gpu_clock_mutex);
5254         WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
5255         clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
5256                 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5257         mutex_unlock(&adev->gfx.gpu_clock_mutex);
5258         return clock;
5259 }
5260
5261 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
5262                                           uint32_t vmid,
5263                                           uint32_t gds_base, uint32_t gds_size,
5264                                           uint32_t gws_base, uint32_t gws_size,
5265                                           uint32_t oa_base, uint32_t oa_size)
5266 {
5267         gds_base = gds_base >> AMDGPU_GDS_SHIFT;
5268         gds_size = gds_size >> AMDGPU_GDS_SHIFT;
5269
5270         gws_base = gws_base >> AMDGPU_GWS_SHIFT;
5271         gws_size = gws_size >> AMDGPU_GWS_SHIFT;
5272
5273         oa_base = oa_base >> AMDGPU_OA_SHIFT;
5274         oa_size = oa_size >> AMDGPU_OA_SHIFT;
5275
5276         /* GDS Base */
5277         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5278         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5279                                 WRITE_DATA_DST_SEL(0)));
5280         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
5281         amdgpu_ring_write(ring, 0);
5282         amdgpu_ring_write(ring, gds_base);
5283
5284         /* GDS Size */
5285         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5286         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5287                                 WRITE_DATA_DST_SEL(0)));
5288         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
5289         amdgpu_ring_write(ring, 0);
5290         amdgpu_ring_write(ring, gds_size);
5291
5292         /* GWS */
5293         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5294         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5295                                 WRITE_DATA_DST_SEL(0)));
5296         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
5297         amdgpu_ring_write(ring, 0);
5298         amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
5299
5300         /* OA */
5301         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5302         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5303                                 WRITE_DATA_DST_SEL(0)));
5304         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
5305         amdgpu_ring_write(ring, 0);
5306         amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
5307 }
5308
5309 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
5310 {
5311         WREG32(mmSQ_IND_INDEX,
5312                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5313                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5314                 (address << SQ_IND_INDEX__INDEX__SHIFT) |
5315                 (SQ_IND_INDEX__FORCE_READ_MASK));
5316         return RREG32(mmSQ_IND_DATA);
5317 }
5318
5319 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
5320                            uint32_t wave, uint32_t thread,
5321                            uint32_t regno, uint32_t num, uint32_t *out)
5322 {
5323         WREG32(mmSQ_IND_INDEX,
5324                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5325                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5326                 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
5327                 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
5328                 (SQ_IND_INDEX__FORCE_READ_MASK) |
5329                 (SQ_IND_INDEX__AUTO_INCR_MASK));
5330         while (num--)
5331                 *(out++) = RREG32(mmSQ_IND_DATA);
5332 }
5333
5334 static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
5335 {
5336         /* type 0 wave data */
5337         dst[(*no_fields)++] = 0;
5338         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
5339         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
5340         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
5341         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
5342         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
5343         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
5344         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
5345         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
5346         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
5347         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
5348         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
5349         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
5350         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_LO);
5351         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_HI);
5352         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_LO);
5353         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI);
5354         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
5355         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
5356 }
5357
5358 static void gfx_v8_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
5359                                      uint32_t wave, uint32_t start,
5360                                      uint32_t size, uint32_t *dst)
5361 {
5362         wave_read_regs(
5363                 adev, simd, wave, 0,
5364                 start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
5365 }
5366
5367
5368 static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = {
5369         .get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter,
5370         .select_se_sh = &gfx_v8_0_select_se_sh,
5371         .read_wave_data = &gfx_v8_0_read_wave_data,
5372         .read_wave_sgprs = &gfx_v8_0_read_wave_sgprs,
5373 };
5374
5375 static int gfx_v8_0_early_init(void *handle)
5376 {
5377         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5378
5379         adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
5380         adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
5381         adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
5382         gfx_v8_0_set_ring_funcs(adev);
5383         gfx_v8_0_set_irq_funcs(adev);
5384         gfx_v8_0_set_gds_init(adev);
5385         gfx_v8_0_set_rlc_funcs(adev);
5386
5387         return 0;
5388 }
5389
5390 static int gfx_v8_0_late_init(void *handle)
5391 {
5392         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5393         int r;
5394
5395         r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
5396         if (r)
5397                 return r;
5398
5399         r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
5400         if (r)
5401                 return r;
5402
5403         /* requires IBs so do in late init after IB pool is initialized */
5404         r = gfx_v8_0_do_edc_gpr_workarounds(adev);
5405         if (r)
5406                 return r;
5407
5408         amdgpu_set_powergating_state(adev,
5409                         AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_GATE);
5410
5411         return 0;
5412 }
5413
5414 static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
5415                                                        bool enable)
5416 {
5417         if ((adev->asic_type == CHIP_POLARIS11) ||
5418             (adev->asic_type == CHIP_POLARIS12))
5419                 /* Send msg to SMU via Powerplay */
5420                 amdgpu_set_powergating_state(adev,
5421                                              AMD_IP_BLOCK_TYPE_SMC,
5422                                              enable ?
5423                                              AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE);
5424
5425         WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0);
5426 }
5427
5428 static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
5429                                                         bool enable)
5430 {
5431         WREG32_FIELD(RLC_PG_CNTL, DYN_PER_CU_PG_ENABLE, enable ? 1 : 0);
5432 }
5433
5434 static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
5435                 bool enable)
5436 {
5437         WREG32_FIELD(RLC_PG_CNTL, QUICK_PG_ENABLE, enable ? 1 : 0);
5438 }
5439
5440 static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
5441                                           bool enable)
5442 {
5443         WREG32_FIELD(RLC_PG_CNTL, GFX_POWER_GATING_ENABLE, enable ? 1 : 0);
5444 }
5445
5446 static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev,
5447                                                 bool enable)
5448 {
5449         WREG32_FIELD(RLC_PG_CNTL, GFX_PIPELINE_PG_ENABLE, enable ? 1 : 0);
5450
5451         /* Read any GFX register to wake up GFX. */
5452         if (!enable)
5453                 RREG32(mmDB_RENDER_CONTROL);
5454 }
5455
5456 static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev,
5457                                           bool enable)
5458 {
5459         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
5460                 cz_enable_gfx_cg_power_gating(adev, true);
5461                 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
5462                         cz_enable_gfx_pipeline_power_gating(adev, true);
5463         } else {
5464                 cz_enable_gfx_cg_power_gating(adev, false);
5465                 cz_enable_gfx_pipeline_power_gating(adev, false);
5466         }
5467 }
5468
5469 static int gfx_v8_0_set_powergating_state(void *handle,
5470                                           enum amd_powergating_state state)
5471 {
5472         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5473         bool enable = (state == AMD_PG_STATE_GATE);
5474
5475         if (amdgpu_sriov_vf(adev))
5476                 return 0;
5477
5478         switch (adev->asic_type) {
5479         case CHIP_CARRIZO:
5480         case CHIP_STONEY:
5481
5482                 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
5483                         cz_enable_sck_slow_down_on_power_up(adev, true);
5484                         cz_enable_sck_slow_down_on_power_down(adev, true);
5485                 } else {
5486                         cz_enable_sck_slow_down_on_power_up(adev, false);
5487                         cz_enable_sck_slow_down_on_power_down(adev, false);
5488                 }
5489                 if (adev->pg_flags & AMD_PG_SUPPORT_CP)
5490                         cz_enable_cp_power_gating(adev, true);
5491                 else
5492                         cz_enable_cp_power_gating(adev, false);
5493
5494                 cz_update_gfx_cg_power_gating(adev, enable);
5495
5496                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5497                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5498                 else
5499                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5500
5501                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5502                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5503                 else
5504                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5505                 break;
5506         case CHIP_POLARIS11:
5507         case CHIP_POLARIS12:
5508                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5509                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5510                 else
5511                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5512
5513                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5514                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5515                 else
5516                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5517
5518                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable)
5519                         polaris11_enable_gfx_quick_mg_power_gating(adev, true);
5520                 else
5521                         polaris11_enable_gfx_quick_mg_power_gating(adev, false);
5522                 break;
5523         default:
5524                 break;
5525         }
5526
5527         return 0;
5528 }
5529
5530 static void gfx_v8_0_get_clockgating_state(void *handle, u32 *flags)
5531 {
5532         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5533         int data;
5534
5535         if (amdgpu_sriov_vf(adev))
5536                 *flags = 0;
5537
5538         /* AMD_CG_SUPPORT_GFX_MGCG */
5539         data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5540         if (!(data & RLC_CGTT_MGCG_OVERRIDE__CPF_MASK))
5541                 *flags |= AMD_CG_SUPPORT_GFX_MGCG;
5542
5543         /* AMD_CG_SUPPORT_GFX_CGLG */
5544         data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5545         if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5546                 *flags |= AMD_CG_SUPPORT_GFX_CGCG;
5547
5548         /* AMD_CG_SUPPORT_GFX_CGLS */
5549         if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5550                 *flags |= AMD_CG_SUPPORT_GFX_CGLS;
5551
5552         /* AMD_CG_SUPPORT_GFX_CGTS */
5553         data = RREG32(mmCGTS_SM_CTRL_REG);
5554         if (!(data & CGTS_SM_CTRL_REG__OVERRIDE_MASK))
5555                 *flags |= AMD_CG_SUPPORT_GFX_CGTS;
5556
5557         /* AMD_CG_SUPPORT_GFX_CGTS_LS */
5558         if (!(data & CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK))
5559                 *flags |= AMD_CG_SUPPORT_GFX_CGTS_LS;
5560
5561         /* AMD_CG_SUPPORT_GFX_RLC_LS */
5562         data = RREG32(mmRLC_MEM_SLP_CNTL);
5563         if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
5564                 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
5565
5566         /* AMD_CG_SUPPORT_GFX_CP_LS */
5567         data = RREG32(mmCP_MEM_SLP_CNTL);
5568         if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
5569                 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
5570 }
5571
5572 static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
5573                                      uint32_t reg_addr, uint32_t cmd)
5574 {
5575         uint32_t data;
5576
5577         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
5578
5579         WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5580         WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5581
5582         data = RREG32(mmRLC_SERDES_WR_CTRL);
5583         if (adev->asic_type == CHIP_STONEY)
5584                 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5585                           RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5586                           RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5587                           RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5588                           RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5589                           RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5590                           RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5591                           RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5592                           RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5593         else
5594                 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5595                           RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5596                           RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5597                           RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5598                           RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5599                           RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5600                           RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5601                           RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5602                           RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
5603                           RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
5604                           RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5605         data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
5606                  (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
5607                  (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
5608                  (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
5609
5610         WREG32(mmRLC_SERDES_WR_CTRL, data);
5611 }
5612
5613 #define MSG_ENTER_RLC_SAFE_MODE     1
5614 #define MSG_EXIT_RLC_SAFE_MODE      0
5615 #define RLC_GPR_REG2__REQ_MASK 0x00000001
5616 #define RLC_GPR_REG2__REQ__SHIFT 0
5617 #define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001
5618 #define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e
5619
5620 static void iceland_enter_rlc_safe_mode(struct amdgpu_device *adev)
5621 {
5622         u32 data;
5623         unsigned i;
5624
5625         data = RREG32(mmRLC_CNTL);
5626         if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5627                 return;
5628
5629         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5630                 data |= RLC_SAFE_MODE__CMD_MASK;
5631                 data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5632                 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
5633                 WREG32(mmRLC_SAFE_MODE, data);
5634
5635                 for (i = 0; i < adev->usec_timeout; i++) {
5636                         if ((RREG32(mmRLC_GPM_STAT) &
5637                              (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5638                               RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5639                             (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5640                              RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5641                                 break;
5642                         udelay(1);
5643                 }
5644
5645                 for (i = 0; i < adev->usec_timeout; i++) {
5646                         if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5647                                 break;
5648                         udelay(1);
5649                 }
5650                 adev->gfx.rlc.in_safe_mode = true;
5651         }
5652 }
5653
5654 static void iceland_exit_rlc_safe_mode(struct amdgpu_device *adev)
5655 {
5656         u32 data = 0;
5657         unsigned i;
5658
5659         data = RREG32(mmRLC_CNTL);
5660         if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5661                 return;
5662
5663         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5664                 if (adev->gfx.rlc.in_safe_mode) {
5665                         data |= RLC_SAFE_MODE__CMD_MASK;
5666                         data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5667                         WREG32(mmRLC_SAFE_MODE, data);
5668                         adev->gfx.rlc.in_safe_mode = false;
5669                 }
5670         }
5671
5672         for (i = 0; i < adev->usec_timeout; i++) {
5673                 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5674                         break;
5675                 udelay(1);
5676         }
5677 }
5678
5679 static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
5680         .enter_safe_mode = iceland_enter_rlc_safe_mode,
5681         .exit_safe_mode = iceland_exit_rlc_safe_mode
5682 };
5683
5684 static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
5685                                                       bool enable)
5686 {
5687         uint32_t temp, data;
5688
5689         adev->gfx.rlc.funcs->enter_safe_mode(adev);
5690
5691         /* It is disabled by HW by default */
5692         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
5693                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5694                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS)
5695                                 /* 1 - RLC memory Light sleep */
5696                                 WREG32_FIELD(RLC_MEM_SLP_CNTL, RLC_MEM_LS_EN, 1);
5697
5698                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS)
5699                                 WREG32_FIELD(CP_MEM_SLP_CNTL, CP_MEM_LS_EN, 1);
5700                 }
5701
5702                 /* 3 - RLC_CGTT_MGCG_OVERRIDE */
5703                 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5704                 if (adev->flags & AMD_IS_APU)
5705                         data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5706                                   RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5707                                   RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK);
5708                 else
5709                         data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5710                                   RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5711                                   RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5712                                   RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5713
5714                 if (temp != data)
5715                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5716
5717                 /* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5718                 gfx_v8_0_wait_for_rlc_serdes(adev);
5719
5720                 /* 5 - clear mgcg override */
5721                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5722
5723                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
5724                         /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
5725                         temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5726                         data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
5727                         data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
5728                         data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
5729                         data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
5730                         if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
5731                             (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
5732                                 data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
5733                         data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
5734                         data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
5735                         if (temp != data)
5736                                 WREG32(mmCGTS_SM_CTRL_REG, data);
5737                 }
5738                 udelay(50);
5739
5740                 /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5741                 gfx_v8_0_wait_for_rlc_serdes(adev);
5742         } else {
5743                 /* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
5744                 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5745                 data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5746                                 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5747                                 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5748                                 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5749                 if (temp != data)
5750                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5751
5752                 /* 2 - disable MGLS in RLC */
5753                 data = RREG32(mmRLC_MEM_SLP_CNTL);
5754                 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
5755                         data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5756                         WREG32(mmRLC_MEM_SLP_CNTL, data);
5757                 }
5758
5759                 /* 3 - disable MGLS in CP */
5760                 data = RREG32(mmCP_MEM_SLP_CNTL);
5761                 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
5762                         data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5763                         WREG32(mmCP_MEM_SLP_CNTL, data);
5764                 }
5765
5766                 /* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
5767                 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5768                 data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
5769                                 CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
5770                 if (temp != data)
5771                         WREG32(mmCGTS_SM_CTRL_REG, data);
5772
5773                 /* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5774                 gfx_v8_0_wait_for_rlc_serdes(adev);
5775
5776                 /* 6 - set mgcg override */
5777                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5778
5779                 udelay(50);
5780
5781                 /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5782                 gfx_v8_0_wait_for_rlc_serdes(adev);
5783         }
5784
5785         adev->gfx.rlc.funcs->exit_safe_mode(adev);
5786 }
5787
5788 static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5789                                                       bool enable)
5790 {
5791         uint32_t temp, temp1, data, data1;
5792
5793         temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5794
5795         adev->gfx.rlc.funcs->enter_safe_mode(adev);
5796
5797         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
5798                 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5799                 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
5800                 if (temp1 != data1)
5801                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5802
5803                 /* : wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5804                 gfx_v8_0_wait_for_rlc_serdes(adev);
5805
5806                 /* 2 - clear cgcg override */
5807                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5808
5809                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5810                 gfx_v8_0_wait_for_rlc_serdes(adev);
5811
5812                 /* 3 - write cmd to set CGLS */
5813                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
5814
5815                 /* 4 - enable cgcg */
5816                 data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5817
5818                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5819                         /* enable cgls*/
5820                         data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5821
5822                         temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5823                         data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
5824
5825                         if (temp1 != data1)
5826                                 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5827                 } else {
5828                         data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5829                 }
5830
5831                 if (temp != data)
5832                         WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5833
5834                 /* 5 enable cntx_empty_int_enable/cntx_busy_int_enable/
5835                  * Cmp_busy/GFX_Idle interrupts
5836                  */
5837                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5838         } else {
5839                 /* disable cntx_empty_int_enable & GFX Idle interrupt */
5840                 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5841
5842                 /* TEST CGCG */
5843                 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5844                 data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
5845                                 RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
5846                 if (temp1 != data1)
5847                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5848
5849                 /* read gfx register to wake up cgcg */
5850                 RREG32(mmCB_CGTT_SCLK_CTRL);
5851                 RREG32(mmCB_CGTT_SCLK_CTRL);
5852                 RREG32(mmCB_CGTT_SCLK_CTRL);
5853                 RREG32(mmCB_CGTT_SCLK_CTRL);
5854
5855                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5856                 gfx_v8_0_wait_for_rlc_serdes(adev);
5857
5858                 /* write cmd to Set CGCG Overrride */
5859                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5860
5861                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5862                 gfx_v8_0_wait_for_rlc_serdes(adev);
5863
5864                 /* write cmd to Clear CGLS */
5865                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
5866
5867                 /* disable cgcg, cgls should be disabled too. */
5868                 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
5869                           RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
5870                 if (temp != data)
5871                         WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5872                 /* enable interrupts again for PG */
5873                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5874         }
5875
5876         gfx_v8_0_wait_for_rlc_serdes(adev);
5877
5878         adev->gfx.rlc.funcs->exit_safe_mode(adev);
5879 }
5880 static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
5881                                             bool enable)
5882 {
5883         if (enable) {
5884                 /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
5885                  * ===  MGCG + MGLS + TS(CG/LS) ===
5886                  */
5887                 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5888                 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5889         } else {
5890                 /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
5891                  * ===  CGCG + CGLS ===
5892                  */
5893                 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5894                 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5895         }
5896         return 0;
5897 }
5898
5899 static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev,
5900                                           enum amd_clockgating_state state)
5901 {
5902         uint32_t msg_id, pp_state = 0;
5903         uint32_t pp_support_state = 0;
5904         void *pp_handle = adev->powerplay.pp_handle;
5905
5906         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
5907                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5908                         pp_support_state = PP_STATE_SUPPORT_LS;
5909                         pp_state = PP_STATE_LS;
5910                 }
5911                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
5912                         pp_support_state |= PP_STATE_SUPPORT_CG;
5913                         pp_state |= PP_STATE_CG;
5914                 }
5915                 if (state == AMD_CG_STATE_UNGATE)
5916                         pp_state = 0;
5917
5918                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5919                                 PP_BLOCK_GFX_CG,
5920                                 pp_support_state,
5921                                 pp_state);
5922                 amd_set_clockgating_by_smu(pp_handle, msg_id);
5923         }
5924
5925         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
5926                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5927                         pp_support_state = PP_STATE_SUPPORT_LS;
5928                         pp_state = PP_STATE_LS;
5929                 }
5930
5931                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
5932                         pp_support_state |= PP_STATE_SUPPORT_CG;
5933                         pp_state |= PP_STATE_CG;
5934                 }
5935
5936                 if (state == AMD_CG_STATE_UNGATE)
5937                         pp_state = 0;
5938
5939                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5940                                 PP_BLOCK_GFX_MG,
5941                                 pp_support_state,
5942                                 pp_state);
5943                 amd_set_clockgating_by_smu(pp_handle, msg_id);
5944         }
5945
5946         return 0;
5947 }
5948
5949 static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev,
5950                                           enum amd_clockgating_state state)
5951 {
5952
5953         uint32_t msg_id, pp_state = 0;
5954         uint32_t pp_support_state = 0;
5955         void *pp_handle = adev->powerplay.pp_handle;
5956
5957         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
5958                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5959                         pp_support_state = PP_STATE_SUPPORT_LS;
5960                         pp_state = PP_STATE_LS;
5961                 }
5962                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
5963                         pp_support_state |= PP_STATE_SUPPORT_CG;
5964                         pp_state |= PP_STATE_CG;
5965                 }
5966                 if (state == AMD_CG_STATE_UNGATE)
5967                         pp_state = 0;
5968
5969                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5970                                 PP_BLOCK_GFX_CG,
5971                                 pp_support_state,
5972                                 pp_state);
5973                 amd_set_clockgating_by_smu(pp_handle, msg_id);
5974         }
5975
5976         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_GFX_3D_CGLS)) {
5977                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) {
5978                         pp_support_state = PP_STATE_SUPPORT_LS;
5979                         pp_state = PP_STATE_LS;
5980                 }
5981                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) {
5982                         pp_support_state |= PP_STATE_SUPPORT_CG;
5983                         pp_state |= PP_STATE_CG;
5984                 }
5985                 if (state == AMD_CG_STATE_UNGATE)
5986                         pp_state = 0;
5987
5988                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5989                                 PP_BLOCK_GFX_3D,
5990                                 pp_support_state,
5991                                 pp_state);
5992                 amd_set_clockgating_by_smu(pp_handle, msg_id);
5993         }
5994
5995         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
5996                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5997                         pp_support_state = PP_STATE_SUPPORT_LS;
5998                         pp_state = PP_STATE_LS;
5999                 }
6000
6001                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
6002                         pp_support_state |= PP_STATE_SUPPORT_CG;
6003                         pp_state |= PP_STATE_CG;
6004                 }
6005
6006                 if (state == AMD_CG_STATE_UNGATE)
6007                         pp_state = 0;
6008
6009                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6010                                 PP_BLOCK_GFX_MG,
6011                                 pp_support_state,
6012                                 pp_state);
6013                 amd_set_clockgating_by_smu(pp_handle, msg_id);
6014         }
6015
6016         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
6017                 pp_support_state = PP_STATE_SUPPORT_LS;
6018
6019                 if (state == AMD_CG_STATE_UNGATE)
6020                         pp_state = 0;
6021                 else
6022                         pp_state = PP_STATE_LS;
6023
6024                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6025                                 PP_BLOCK_GFX_RLC,
6026                                 pp_support_state,
6027                                 pp_state);
6028                 amd_set_clockgating_by_smu(pp_handle, msg_id);
6029         }
6030
6031         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
6032                 pp_support_state = PP_STATE_SUPPORT_LS;
6033
6034                 if (state == AMD_CG_STATE_UNGATE)
6035                         pp_state = 0;
6036                 else
6037                         pp_state = PP_STATE_LS;
6038                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6039                         PP_BLOCK_GFX_CP,
6040                         pp_support_state,
6041                         pp_state);
6042                 amd_set_clockgating_by_smu(pp_handle, msg_id);
6043         }
6044
6045         return 0;
6046 }
6047
6048 static int gfx_v8_0_set_clockgating_state(void *handle,
6049                                           enum amd_clockgating_state state)
6050 {
6051         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
6052
6053         if (amdgpu_sriov_vf(adev))
6054                 return 0;
6055
6056         switch (adev->asic_type) {
6057         case CHIP_FIJI:
6058         case CHIP_CARRIZO:
6059         case CHIP_STONEY:
6060                 gfx_v8_0_update_gfx_clock_gating(adev,
6061                                                  state == AMD_CG_STATE_GATE);
6062                 break;
6063         case CHIP_TONGA:
6064                 gfx_v8_0_tonga_update_gfx_clock_gating(adev, state);
6065                 break;
6066         case CHIP_POLARIS10:
6067         case CHIP_POLARIS11:
6068         case CHIP_POLARIS12:
6069                 gfx_v8_0_polaris_update_gfx_clock_gating(adev, state);
6070                 break;
6071         default:
6072                 break;
6073         }
6074         return 0;
6075 }
6076
6077 static u64 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring)
6078 {
6079         return ring->adev->wb.wb[ring->rptr_offs];
6080 }
6081
6082 static u64 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
6083 {
6084         struct amdgpu_device *adev = ring->adev;
6085
6086         if (ring->use_doorbell)
6087                 /* XXX check if swapping is necessary on BE */
6088                 return ring->adev->wb.wb[ring->wptr_offs];
6089         else
6090                 return RREG32(mmCP_RB0_WPTR);
6091 }
6092
6093 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
6094 {
6095         struct amdgpu_device *adev = ring->adev;
6096
6097         if (ring->use_doorbell) {
6098                 /* XXX check if swapping is necessary on BE */
6099                 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6100                 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6101         } else {
6102                 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
6103                 (void)RREG32(mmCP_RB0_WPTR);
6104         }
6105 }
6106
6107 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
6108 {
6109         u32 ref_and_mask, reg_mem_engine;
6110
6111         if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) ||
6112             (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)) {
6113                 switch (ring->me) {
6114                 case 1:
6115                         ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
6116                         break;
6117                 case 2:
6118                         ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
6119                         break;
6120                 default:
6121                         return;
6122                 }
6123                 reg_mem_engine = 0;
6124         } else {
6125                 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
6126                 reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
6127         }
6128
6129         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6130         amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
6131                                  WAIT_REG_MEM_FUNCTION(3) |  /* == */
6132                                  reg_mem_engine));
6133         amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
6134         amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
6135         amdgpu_ring_write(ring, ref_and_mask);
6136         amdgpu_ring_write(ring, ref_and_mask);
6137         amdgpu_ring_write(ring, 0x20); /* poll interval */
6138 }
6139
6140 static void gfx_v8_0_ring_emit_vgt_flush(struct amdgpu_ring *ring)
6141 {
6142         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6143         amdgpu_ring_write(ring, EVENT_TYPE(VS_PARTIAL_FLUSH) |
6144                 EVENT_INDEX(4));
6145
6146         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6147         amdgpu_ring_write(ring, EVENT_TYPE(VGT_FLUSH) |
6148                 EVENT_INDEX(0));
6149 }
6150
6151
6152 static void gfx_v8_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
6153 {
6154         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6155         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6156                                  WRITE_DATA_DST_SEL(0) |
6157                                  WR_CONFIRM));
6158         amdgpu_ring_write(ring, mmHDP_DEBUG0);
6159         amdgpu_ring_write(ring, 0);
6160         amdgpu_ring_write(ring, 1);
6161
6162 }
6163
6164 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
6165                                       struct amdgpu_ib *ib,
6166                                       unsigned vm_id, bool ctx_switch)
6167 {
6168         u32 header, control = 0;
6169
6170         if (ib->flags & AMDGPU_IB_FLAG_CE)
6171                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
6172         else
6173                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
6174
6175         control |= ib->length_dw | (vm_id << 24);
6176
6177         if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
6178                 control |= INDIRECT_BUFFER_PRE_ENB(1);
6179
6180                 if (!(ib->flags & AMDGPU_IB_FLAG_CE))
6181                         gfx_v8_0_ring_emit_de_meta(ring);
6182         }
6183
6184         amdgpu_ring_write(ring, header);
6185         amdgpu_ring_write(ring,
6186 #ifdef __BIG_ENDIAN
6187                           (2 << 0) |
6188 #endif
6189                           (ib->gpu_addr & 0xFFFFFFFC));
6190         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6191         amdgpu_ring_write(ring, control);
6192 }
6193
6194 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
6195                                           struct amdgpu_ib *ib,
6196                                           unsigned vm_id, bool ctx_switch)
6197 {
6198         u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vm_id << 24);
6199
6200         amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
6201         amdgpu_ring_write(ring,
6202 #ifdef __BIG_ENDIAN
6203                                 (2 << 0) |
6204 #endif
6205                                 (ib->gpu_addr & 0xFFFFFFFC));
6206         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6207         amdgpu_ring_write(ring, control);
6208 }
6209
6210 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
6211                                          u64 seq, unsigned flags)
6212 {
6213         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6214         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6215
6216         /* EVENT_WRITE_EOP - flush caches, send int */
6217         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6218         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6219                                  EOP_TC_ACTION_EN |
6220                                  EOP_TC_WB_ACTION_EN |
6221                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6222                                  EVENT_INDEX(5)));
6223         amdgpu_ring_write(ring, addr & 0xfffffffc);
6224         amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
6225                           DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6226         amdgpu_ring_write(ring, lower_32_bits(seq));
6227         amdgpu_ring_write(ring, upper_32_bits(seq));
6228
6229 }
6230
6231 static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
6232 {
6233         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6234         uint32_t seq = ring->fence_drv.sync_seq;
6235         uint64_t addr = ring->fence_drv.gpu_addr;
6236
6237         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6238         amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
6239                                  WAIT_REG_MEM_FUNCTION(3) | /* equal */
6240                                  WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
6241         amdgpu_ring_write(ring, addr & 0xfffffffc);
6242         amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
6243         amdgpu_ring_write(ring, seq);
6244         amdgpu_ring_write(ring, 0xffffffff);
6245         amdgpu_ring_write(ring, 4); /* poll interval */
6246 }
6247
6248 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
6249                                         unsigned vm_id, uint64_t pd_addr)
6250 {
6251         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6252
6253         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6254         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6255                                  WRITE_DATA_DST_SEL(0)) |
6256                                  WR_CONFIRM);
6257         if (vm_id < 8) {
6258                 amdgpu_ring_write(ring,
6259                                   (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id));
6260         } else {
6261                 amdgpu_ring_write(ring,
6262                                   (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vm_id - 8));
6263         }
6264         amdgpu_ring_write(ring, 0);
6265         amdgpu_ring_write(ring, pd_addr >> 12);
6266
6267         /* bits 0-15 are the VM contexts0-15 */
6268         /* invalidate the cache */
6269         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6270         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6271                                  WRITE_DATA_DST_SEL(0)));
6272         amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6273         amdgpu_ring_write(ring, 0);
6274         amdgpu_ring_write(ring, 1 << vm_id);
6275
6276         /* wait for the invalidate to complete */
6277         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6278         amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6279                                  WAIT_REG_MEM_FUNCTION(0) |  /* always */
6280                                  WAIT_REG_MEM_ENGINE(0))); /* me */
6281         amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6282         amdgpu_ring_write(ring, 0);
6283         amdgpu_ring_write(ring, 0); /* ref */
6284         amdgpu_ring_write(ring, 0); /* mask */
6285         amdgpu_ring_write(ring, 0x20); /* poll interval */
6286
6287         /* compute doesn't have PFP */
6288         if (usepfp) {
6289                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
6290                 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6291                 amdgpu_ring_write(ring, 0x0);
6292         }
6293 }
6294
6295 static u64 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
6296 {
6297         return ring->adev->wb.wb[ring->wptr_offs];
6298 }
6299
6300 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
6301 {
6302         struct amdgpu_device *adev = ring->adev;
6303
6304         /* XXX check if swapping is necessary on BE */
6305         adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6306         WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6307 }
6308
6309 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
6310                                              u64 addr, u64 seq,
6311                                              unsigned flags)
6312 {
6313         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6314         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6315
6316         /* RELEASE_MEM - flush caches, send int */
6317         amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
6318         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6319                                  EOP_TC_ACTION_EN |
6320                                  EOP_TC_WB_ACTION_EN |
6321                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6322                                  EVENT_INDEX(5)));
6323         amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6324         amdgpu_ring_write(ring, addr & 0xfffffffc);
6325         amdgpu_ring_write(ring, upper_32_bits(addr));
6326         amdgpu_ring_write(ring, lower_32_bits(seq));
6327         amdgpu_ring_write(ring, upper_32_bits(seq));
6328 }
6329
6330 static void gfx_v8_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
6331                                          u64 seq, unsigned int flags)
6332 {
6333         /* we only allocate 32bit for each seq wb address */
6334         BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
6335
6336         /* write fence seq to the "addr" */
6337         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6338         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6339                                  WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
6340         amdgpu_ring_write(ring, lower_32_bits(addr));
6341         amdgpu_ring_write(ring, upper_32_bits(addr));
6342         amdgpu_ring_write(ring, lower_32_bits(seq));
6343
6344         if (flags & AMDGPU_FENCE_FLAG_INT) {
6345                 /* set register to trigger INT */
6346                 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6347                 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6348                                          WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
6349                 amdgpu_ring_write(ring, mmCPC_INT_STATUS);
6350                 amdgpu_ring_write(ring, 0);
6351                 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
6352         }
6353 }
6354
6355 static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring)
6356 {
6357         amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6358         amdgpu_ring_write(ring, 0);
6359 }
6360
6361 static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
6362 {
6363         uint32_t dw2 = 0;
6364
6365         if (amdgpu_sriov_vf(ring->adev))
6366                 gfx_v8_0_ring_emit_ce_meta(ring);
6367
6368         dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
6369         if (flags & AMDGPU_HAVE_CTX_SWITCH) {
6370                 gfx_v8_0_ring_emit_vgt_flush(ring);
6371                 /* set load_global_config & load_global_uconfig */
6372                 dw2 |= 0x8001;
6373                 /* set load_cs_sh_regs */
6374                 dw2 |= 0x01000000;
6375                 /* set load_per_context_state & load_gfx_sh_regs for GFX */
6376                 dw2 |= 0x10002;
6377
6378                 /* set load_ce_ram if preamble presented */
6379                 if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
6380                         dw2 |= 0x10000000;
6381         } else {
6382                 /* still load_ce_ram if this is the first time preamble presented
6383                  * although there is no context switch happens.
6384                  */
6385                 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
6386                         dw2 |= 0x10000000;
6387         }
6388
6389         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6390         amdgpu_ring_write(ring, dw2);
6391         amdgpu_ring_write(ring, 0);
6392 }
6393
6394 static unsigned gfx_v8_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
6395 {
6396         unsigned ret;
6397
6398         amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
6399         amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
6400         amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
6401         amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
6402         ret = ring->wptr & ring->buf_mask;
6403         amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
6404         return ret;
6405 }
6406
6407 static void gfx_v8_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
6408 {
6409         unsigned cur;
6410
6411         BUG_ON(offset > ring->buf_mask);
6412         BUG_ON(ring->ring[offset] != 0x55aa55aa);
6413
6414         cur = (ring->wptr & ring->buf_mask) - 1;
6415         if (likely(cur > offset))
6416                 ring->ring[offset] = cur - offset;
6417         else
6418                 ring->ring[offset] = (ring->ring_size >> 2) - offset + cur;
6419 }
6420
6421 static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
6422 {
6423         struct amdgpu_device *adev = ring->adev;
6424
6425         amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
6426         amdgpu_ring_write(ring, 0 |     /* src: register*/
6427                                 (5 << 8) |      /* dst: memory */
6428                                 (1 << 20));     /* write confirm */
6429         amdgpu_ring_write(ring, reg);
6430         amdgpu_ring_write(ring, 0);
6431         amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
6432                                 adev->virt.reg_val_offs * 4));
6433         amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
6434                                 adev->virt.reg_val_offs * 4));
6435 }
6436
6437 static void gfx_v8_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
6438                                   uint32_t val)
6439 {
6440         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6441         amdgpu_ring_write(ring, (1 << 16)); /* no inc addr */
6442         amdgpu_ring_write(ring, reg);
6443         amdgpu_ring_write(ring, 0);
6444         amdgpu_ring_write(ring, val);
6445 }
6446
6447 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
6448                                                  enum amdgpu_interrupt_state state)
6449 {
6450         WREG32_FIELD(CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE,
6451                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6452 }
6453
6454 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
6455                                                      int me, int pipe,
6456                                                      enum amdgpu_interrupt_state state)
6457 {
6458         u32 mec_int_cntl, mec_int_cntl_reg;
6459
6460         /*
6461          * amdgpu controls only the first MEC. That's why this function only
6462          * handles the setting of interrupts for this specific MEC. All other
6463          * pipes' interrupts are set by amdkfd.
6464          */
6465
6466         if (me == 1) {
6467                 switch (pipe) {
6468                 case 0:
6469                         mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
6470                         break;
6471                 case 1:
6472                         mec_int_cntl_reg = mmCP_ME1_PIPE1_INT_CNTL;
6473                         break;
6474                 case 2:
6475                         mec_int_cntl_reg = mmCP_ME1_PIPE2_INT_CNTL;
6476                         break;
6477                 case 3:
6478                         mec_int_cntl_reg = mmCP_ME1_PIPE3_INT_CNTL;
6479                         break;
6480                 default:
6481                         DRM_DEBUG("invalid pipe %d\n", pipe);
6482                         return;
6483                 }
6484         } else {
6485                 DRM_DEBUG("invalid me %d\n", me);
6486                 return;
6487         }
6488
6489         switch (state) {
6490         case AMDGPU_IRQ_STATE_DISABLE:
6491                 mec_int_cntl = RREG32(mec_int_cntl_reg);
6492                 mec_int_cntl &= ~CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6493                 WREG32(mec_int_cntl_reg, mec_int_cntl);
6494                 break;
6495         case AMDGPU_IRQ_STATE_ENABLE:
6496                 mec_int_cntl = RREG32(mec_int_cntl_reg);
6497                 mec_int_cntl |= CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6498                 WREG32(mec_int_cntl_reg, mec_int_cntl);
6499                 break;
6500         default:
6501                 break;
6502         }
6503 }
6504
6505 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
6506                                              struct amdgpu_irq_src *source,
6507                                              unsigned type,
6508                                              enum amdgpu_interrupt_state state)
6509 {
6510         WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE,
6511                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6512
6513         return 0;
6514 }
6515
6516 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
6517                                               struct amdgpu_irq_src *source,
6518                                               unsigned type,
6519                                               enum amdgpu_interrupt_state state)
6520 {
6521         WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE,
6522                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6523
6524         return 0;
6525 }
6526
6527 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6528                                             struct amdgpu_irq_src *src,
6529                                             unsigned type,
6530                                             enum amdgpu_interrupt_state state)
6531 {
6532         switch (type) {
6533         case AMDGPU_CP_IRQ_GFX_EOP:
6534                 gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
6535                 break;
6536         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6537                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6538                 break;
6539         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6540                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6541                 break;
6542         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6543                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6544                 break;
6545         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6546                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6547                 break;
6548         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6549                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6550                 break;
6551         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6552                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6553                 break;
6554         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6555                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6556                 break;
6557         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6558                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6559                 break;
6560         default:
6561                 break;
6562         }
6563         return 0;
6564 }
6565
6566 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
6567                             struct amdgpu_irq_src *source,
6568                             struct amdgpu_iv_entry *entry)
6569 {
6570         int i;
6571         u8 me_id, pipe_id, queue_id;
6572         struct amdgpu_ring *ring;
6573
6574         DRM_DEBUG("IH: CP EOP\n");
6575         me_id = (entry->ring_id & 0x0c) >> 2;
6576         pipe_id = (entry->ring_id & 0x03) >> 0;
6577         queue_id = (entry->ring_id & 0x70) >> 4;
6578
6579         switch (me_id) {
6580         case 0:
6581                 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6582                 break;
6583         case 1:
6584         case 2:
6585                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6586                         ring = &adev->gfx.compute_ring[i];
6587                         /* Per-queue interrupt is supported for MEC starting from VI.
6588                           * The interrupt can only be enabled/disabled per pipe instead of per queue.
6589                           */
6590                         if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6591                                 amdgpu_fence_process(ring);
6592                 }
6593                 break;
6594         }
6595         return 0;
6596 }
6597
6598 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
6599                                  struct amdgpu_irq_src *source,
6600                                  struct amdgpu_iv_entry *entry)
6601 {
6602         DRM_ERROR("Illegal register access in command stream\n");
6603         schedule_work(&adev->reset_work);
6604         return 0;
6605 }
6606
6607 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
6608                                   struct amdgpu_irq_src *source,
6609                                   struct amdgpu_iv_entry *entry)
6610 {
6611         DRM_ERROR("Illegal instruction in command stream\n");
6612         schedule_work(&adev->reset_work);
6613         return 0;
6614 }
6615
6616 static int gfx_v8_0_kiq_set_interrupt_state(struct amdgpu_device *adev,
6617                                             struct amdgpu_irq_src *src,
6618                                             unsigned int type,
6619                                             enum amdgpu_interrupt_state state)
6620 {
6621         struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
6622
6623         switch (type) {
6624         case AMDGPU_CP_KIQ_IRQ_DRIVER0:
6625                 WREG32_FIELD(CPC_INT_CNTL, GENERIC2_INT_ENABLE,
6626                              state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6627                 if (ring->me == 1)
6628                         WREG32_FIELD_OFFSET(CP_ME1_PIPE0_INT_CNTL,
6629                                      ring->pipe,
6630                                      GENERIC2_INT_ENABLE,
6631                                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6632                 else
6633                         WREG32_FIELD_OFFSET(CP_ME2_PIPE0_INT_CNTL,
6634                                      ring->pipe,
6635                                      GENERIC2_INT_ENABLE,
6636                                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6637                 break;
6638         default:
6639                 BUG(); /* kiq only support GENERIC2_INT now */
6640                 break;
6641         }
6642         return 0;
6643 }
6644
6645 static int gfx_v8_0_kiq_irq(struct amdgpu_device *adev,
6646                             struct amdgpu_irq_src *source,
6647                             struct amdgpu_iv_entry *entry)
6648 {
6649         u8 me_id, pipe_id, queue_id;
6650         struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
6651
6652         me_id = (entry->ring_id & 0x0c) >> 2;
6653         pipe_id = (entry->ring_id & 0x03) >> 0;
6654         queue_id = (entry->ring_id & 0x70) >> 4;
6655         DRM_DEBUG("IH: CPC GENERIC2_INT, me:%d, pipe:%d, queue:%d\n",
6656                    me_id, pipe_id, queue_id);
6657
6658         amdgpu_fence_process(ring);
6659         return 0;
6660 }
6661
6662 static const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
6663         .name = "gfx_v8_0",
6664         .early_init = gfx_v8_0_early_init,
6665         .late_init = gfx_v8_0_late_init,
6666         .sw_init = gfx_v8_0_sw_init,
6667         .sw_fini = gfx_v8_0_sw_fini,
6668         .hw_init = gfx_v8_0_hw_init,
6669         .hw_fini = gfx_v8_0_hw_fini,
6670         .suspend = gfx_v8_0_suspend,
6671         .resume = gfx_v8_0_resume,
6672         .is_idle = gfx_v8_0_is_idle,
6673         .wait_for_idle = gfx_v8_0_wait_for_idle,
6674         .check_soft_reset = gfx_v8_0_check_soft_reset,
6675         .pre_soft_reset = gfx_v8_0_pre_soft_reset,
6676         .soft_reset = gfx_v8_0_soft_reset,
6677         .post_soft_reset = gfx_v8_0_post_soft_reset,
6678         .set_clockgating_state = gfx_v8_0_set_clockgating_state,
6679         .set_powergating_state = gfx_v8_0_set_powergating_state,
6680         .get_clockgating_state = gfx_v8_0_get_clockgating_state,
6681 };
6682
6683 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
6684         .type = AMDGPU_RING_TYPE_GFX,
6685         .align_mask = 0xff,
6686         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6687         .support_64bit_ptrs = false,
6688         .get_rptr = gfx_v8_0_ring_get_rptr,
6689         .get_wptr = gfx_v8_0_ring_get_wptr_gfx,
6690         .set_wptr = gfx_v8_0_ring_set_wptr_gfx,
6691         .emit_frame_size = /* maximum 215dw if count 16 IBs in */
6692                 5 +  /* COND_EXEC */
6693                 7 +  /* PIPELINE_SYNC */
6694                 19 + /* VM_FLUSH */
6695                 8 +  /* FENCE for VM_FLUSH */
6696                 20 + /* GDS switch */
6697                 4 + /* double SWITCH_BUFFER,
6698                        the first COND_EXEC jump to the place just
6699                            prior to this double SWITCH_BUFFER  */
6700                 5 + /* COND_EXEC */
6701                 7 +      /*     HDP_flush */
6702                 4 +      /*     VGT_flush */
6703                 14 + /* CE_META */
6704                 31 + /* DE_META */
6705                 3 + /* CNTX_CTRL */
6706                 5 + /* HDP_INVL */
6707                 8 + 8 + /* FENCE x2 */
6708                 2, /* SWITCH_BUFFER */
6709         .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_gfx */
6710         .emit_ib = gfx_v8_0_ring_emit_ib_gfx,
6711         .emit_fence = gfx_v8_0_ring_emit_fence_gfx,
6712         .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6713         .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6714         .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6715         .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6716         .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
6717         .test_ring = gfx_v8_0_ring_test_ring,
6718         .test_ib = gfx_v8_0_ring_test_ib,
6719         .insert_nop = amdgpu_ring_insert_nop,
6720         .pad_ib = amdgpu_ring_generic_pad_ib,
6721         .emit_switch_buffer = gfx_v8_ring_emit_sb,
6722         .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
6723         .init_cond_exec = gfx_v8_0_ring_emit_init_cond_exec,
6724         .patch_cond_exec = gfx_v8_0_ring_emit_patch_cond_exec,
6725 };
6726
6727 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
6728         .type = AMDGPU_RING_TYPE_COMPUTE,
6729         .align_mask = 0xff,
6730         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6731         .support_64bit_ptrs = false,
6732         .get_rptr = gfx_v8_0_ring_get_rptr,
6733         .get_wptr = gfx_v8_0_ring_get_wptr_compute,
6734         .set_wptr = gfx_v8_0_ring_set_wptr_compute,
6735         .emit_frame_size =
6736                 20 + /* gfx_v8_0_ring_emit_gds_switch */
6737                 7 + /* gfx_v8_0_ring_emit_hdp_flush */
6738                 5 + /* gfx_v8_0_ring_emit_hdp_invalidate */
6739                 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6740                 17 + /* gfx_v8_0_ring_emit_vm_flush */
6741                 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
6742         .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */
6743         .emit_ib = gfx_v8_0_ring_emit_ib_compute,
6744         .emit_fence = gfx_v8_0_ring_emit_fence_compute,
6745         .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6746         .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6747         .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6748         .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6749         .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
6750         .test_ring = gfx_v8_0_ring_test_ring,
6751         .test_ib = gfx_v8_0_ring_test_ib,
6752         .insert_nop = amdgpu_ring_insert_nop,
6753         .pad_ib = amdgpu_ring_generic_pad_ib,
6754 };
6755
6756 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {
6757         .type = AMDGPU_RING_TYPE_KIQ,
6758         .align_mask = 0xff,
6759         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6760         .support_64bit_ptrs = false,
6761         .get_rptr = gfx_v8_0_ring_get_rptr,
6762         .get_wptr = gfx_v8_0_ring_get_wptr_compute,
6763         .set_wptr = gfx_v8_0_ring_set_wptr_compute,
6764         .emit_frame_size =
6765                 20 + /* gfx_v8_0_ring_emit_gds_switch */
6766                 7 + /* gfx_v8_0_ring_emit_hdp_flush */
6767                 5 + /* gfx_v8_0_ring_emit_hdp_invalidate */
6768                 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6769                 17 + /* gfx_v8_0_ring_emit_vm_flush */
6770                 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */
6771         .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */
6772         .emit_ib = gfx_v8_0_ring_emit_ib_compute,
6773         .emit_fence = gfx_v8_0_ring_emit_fence_kiq,
6774         .test_ring = gfx_v8_0_ring_test_ring,
6775         .test_ib = gfx_v8_0_ring_test_ib,
6776         .insert_nop = amdgpu_ring_insert_nop,
6777         .pad_ib = amdgpu_ring_generic_pad_ib,
6778         .emit_rreg = gfx_v8_0_ring_emit_rreg,
6779         .emit_wreg = gfx_v8_0_ring_emit_wreg,
6780 };
6781
6782 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
6783 {
6784         int i;
6785
6786         adev->gfx.kiq.ring.funcs = &gfx_v8_0_ring_funcs_kiq;
6787
6788         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
6789                 adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
6790
6791         for (i = 0; i < adev->gfx.num_compute_rings; i++)
6792                 adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
6793 }
6794
6795 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
6796         .set = gfx_v8_0_set_eop_interrupt_state,
6797         .process = gfx_v8_0_eop_irq,
6798 };
6799
6800 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
6801         .set = gfx_v8_0_set_priv_reg_fault_state,
6802         .process = gfx_v8_0_priv_reg_irq,
6803 };
6804
6805 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
6806         .set = gfx_v8_0_set_priv_inst_fault_state,
6807         .process = gfx_v8_0_priv_inst_irq,
6808 };
6809
6810 static const struct amdgpu_irq_src_funcs gfx_v8_0_kiq_irq_funcs = {
6811         .set = gfx_v8_0_kiq_set_interrupt_state,
6812         .process = gfx_v8_0_kiq_irq,
6813 };
6814
6815 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
6816 {
6817         adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
6818         adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
6819
6820         adev->gfx.priv_reg_irq.num_types = 1;
6821         adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
6822
6823         adev->gfx.priv_inst_irq.num_types = 1;
6824         adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
6825
6826         adev->gfx.kiq.irq.num_types = AMDGPU_CP_KIQ_IRQ_LAST;
6827         adev->gfx.kiq.irq.funcs = &gfx_v8_0_kiq_irq_funcs;
6828 }
6829
6830 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
6831 {
6832         adev->gfx.rlc.funcs = &iceland_rlc_funcs;
6833 }
6834
6835 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
6836 {
6837         /* init asci gds info */
6838         adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
6839         adev->gds.gws.total_size = 64;
6840         adev->gds.oa.total_size = 16;
6841
6842         if (adev->gds.mem.total_size == 64 * 1024) {
6843                 adev->gds.mem.gfx_partition_size = 4096;
6844                 adev->gds.mem.cs_partition_size = 4096;
6845
6846                 adev->gds.gws.gfx_partition_size = 4;
6847                 adev->gds.gws.cs_partition_size = 4;
6848
6849                 adev->gds.oa.gfx_partition_size = 4;
6850                 adev->gds.oa.cs_partition_size = 1;
6851         } else {
6852                 adev->gds.mem.gfx_partition_size = 1024;
6853                 adev->gds.mem.cs_partition_size = 1024;
6854
6855                 adev->gds.gws.gfx_partition_size = 16;
6856                 adev->gds.gws.cs_partition_size = 16;
6857
6858                 adev->gds.oa.gfx_partition_size = 4;
6859                 adev->gds.oa.cs_partition_size = 4;
6860         }
6861 }
6862
6863 static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
6864                                                  u32 bitmap)
6865 {
6866         u32 data;
6867
6868         if (!bitmap)
6869                 return;
6870
6871         data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
6872         data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
6873
6874         WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
6875 }
6876
6877 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
6878 {
6879         u32 data, mask;
6880
6881         data =  RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) |
6882                 RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
6883
6884         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
6885
6886         return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask;
6887 }
6888
6889 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
6890 {
6891         int i, j, k, counter, active_cu_number = 0;
6892         u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
6893         struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
6894         unsigned disable_masks[4 * 2];
6895         u32 ao_cu_num;
6896
6897         memset(cu_info, 0, sizeof(*cu_info));
6898
6899         if (adev->flags & AMD_IS_APU)
6900                 ao_cu_num = 2;
6901         else
6902                 ao_cu_num = adev->gfx.config.max_cu_per_sh;
6903
6904         amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
6905
6906         mutex_lock(&adev->grbm_idx_mutex);
6907         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
6908                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
6909                         mask = 1;
6910                         ao_bitmap = 0;
6911                         counter = 0;
6912                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
6913                         if (i < 4 && j < 2)
6914                                 gfx_v8_0_set_user_cu_inactive_bitmap(
6915                                         adev, disable_masks[i * 2 + j]);
6916                         bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
6917                         cu_info->bitmap[i][j] = bitmap;
6918
6919                         for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
6920                                 if (bitmap & mask) {
6921                                         if (counter < ao_cu_num)
6922                                                 ao_bitmap |= mask;
6923                                         counter ++;
6924                                 }
6925                                 mask <<= 1;
6926                         }
6927                         active_cu_number += counter;
6928                         if (i < 2 && j < 2)
6929                                 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
6930                         cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
6931                 }
6932         }
6933         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6934         mutex_unlock(&adev->grbm_idx_mutex);
6935
6936         cu_info->number = active_cu_number;
6937         cu_info->ao_cu_mask = ao_cu_mask;
6938 }
6939
6940 const struct amdgpu_ip_block_version gfx_v8_0_ip_block =
6941 {
6942         .type = AMD_IP_BLOCK_TYPE_GFX,
6943         .major = 8,
6944         .minor = 0,
6945         .rev = 0,
6946         .funcs = &gfx_v8_0_ip_funcs,
6947 };
6948
6949 const struct amdgpu_ip_block_version gfx_v8_1_ip_block =
6950 {
6951         .type = AMD_IP_BLOCK_TYPE_GFX,
6952         .major = 8,
6953         .minor = 1,
6954         .rev = 0,
6955         .funcs = &gfx_v8_0_ip_funcs,
6956 };
6957
6958 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
6959 {
6960         uint64_t ce_payload_addr;
6961         int cnt_ce;
6962         static union {
6963                 struct vi_ce_ib_state regular;
6964                 struct vi_ce_ib_state_chained_ib chained;
6965         } ce_payload = {};
6966
6967         if (ring->adev->virt.chained_ib_support) {
6968                 ce_payload_addr = AMDGPU_VA_RESERVED_SIZE - 2 * 4096 +
6969                                                   offsetof(struct vi_gfx_meta_data_chained_ib, ce_payload);
6970                 cnt_ce = (sizeof(ce_payload.chained) >> 2) + 4 - 2;
6971         } else {
6972                 ce_payload_addr = AMDGPU_VA_RESERVED_SIZE - 2 * 4096 +
6973                                                   offsetof(struct vi_gfx_meta_data, ce_payload);
6974                 cnt_ce = (sizeof(ce_payload.regular) >> 2) + 4 - 2;
6975         }
6976
6977         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_ce));
6978         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
6979                                 WRITE_DATA_DST_SEL(8) |
6980                                 WR_CONFIRM) |
6981                                 WRITE_DATA_CACHE_POLICY(0));
6982         amdgpu_ring_write(ring, lower_32_bits(ce_payload_addr));
6983         amdgpu_ring_write(ring, upper_32_bits(ce_payload_addr));
6984         amdgpu_ring_write_multiple(ring, (void *)&ce_payload, cnt_ce - 2);
6985 }
6986
6987 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring)
6988 {
6989         uint64_t de_payload_addr, gds_addr, csa_addr;
6990         int cnt_de;
6991         static union {
6992                 struct vi_de_ib_state regular;
6993                 struct vi_de_ib_state_chained_ib chained;
6994         } de_payload = {};
6995
6996         csa_addr = AMDGPU_VA_RESERVED_SIZE - 2 * 4096;
6997         gds_addr = csa_addr + 4096;
6998         if (ring->adev->virt.chained_ib_support) {
6999                 de_payload.chained.gds_backup_addrlo = lower_32_bits(gds_addr);
7000                 de_payload.chained.gds_backup_addrhi = upper_32_bits(gds_addr);
7001                 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data_chained_ib, de_payload);
7002                 cnt_de = (sizeof(de_payload.chained) >> 2) + 4 - 2;
7003         } else {
7004                 de_payload.regular.gds_backup_addrlo = lower_32_bits(gds_addr);
7005                 de_payload.regular.gds_backup_addrhi = upper_32_bits(gds_addr);
7006                 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data, de_payload);
7007                 cnt_de = (sizeof(de_payload.regular) >> 2) + 4 - 2;
7008         }
7009
7010         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_de));
7011         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
7012                                 WRITE_DATA_DST_SEL(8) |
7013                                 WR_CONFIRM) |
7014                                 WRITE_DATA_CACHE_POLICY(0));
7015         amdgpu_ring_write(ring, lower_32_bits(de_payload_addr));
7016         amdgpu_ring_write(ring, upper_32_bits(de_payload_addr));
7017         amdgpu_ring_write_multiple(ring, (void *)&de_payload, cnt_de - 2);
7018 }
This page took 0.464195 seconds and 4 git commands to generate.