]> Git Repo - linux.git/blob - drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
drm/amdgpu: fix comment on amdgpu_bo_va
[linux.git] / drivers / gpu / drm / amd / amdgpu / gfx_v8_0.c
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 #include <linux/firmware.h>
24 #include <drm/drmP.h>
25 #include "amdgpu.h"
26 #include "amdgpu_gfx.h"
27 #include "vi.h"
28 #include "vi_structs.h"
29 #include "vid.h"
30 #include "amdgpu_ucode.h"
31 #include "amdgpu_atombios.h"
32 #include "atombios_i2c.h"
33 #include "clearstate_vi.h"
34
35 #include "gmc/gmc_8_2_d.h"
36 #include "gmc/gmc_8_2_sh_mask.h"
37
38 #include "oss/oss_3_0_d.h"
39 #include "oss/oss_3_0_sh_mask.h"
40
41 #include "bif/bif_5_0_d.h"
42 #include "bif/bif_5_0_sh_mask.h"
43 #include "gca/gfx_8_0_d.h"
44 #include "gca/gfx_8_0_enum.h"
45 #include "gca/gfx_8_0_sh_mask.h"
46 #include "gca/gfx_8_0_enum.h"
47
48 #include "dce/dce_10_0_d.h"
49 #include "dce/dce_10_0_sh_mask.h"
50
51 #include "smu/smu_7_1_3_d.h"
52
53 #define GFX8_NUM_GFX_RINGS     1
54 #define GFX8_MEC_HPD_SIZE 2048
55
56 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
57 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
58 #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
59 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
60
61 #define ARRAY_MODE(x)                                   ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
62 #define PIPE_CONFIG(x)                                  ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
63 #define TILE_SPLIT(x)                                   ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
64 #define MICRO_TILE_MODE_NEW(x)                          ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
65 #define SAMPLE_SPLIT(x)                                 ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
66 #define BANK_WIDTH(x)                                   ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
67 #define BANK_HEIGHT(x)                                  ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
68 #define MACRO_TILE_ASPECT(x)                            ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
69 #define NUM_BANKS(x)                                    ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
70
71 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK            0x00000001L
72 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK            0x00000002L
73 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK           0x00000004L
74 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK           0x00000008L
75 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK           0x00000010L
76 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK           0x00000020L
77
78 /* BPM SERDES CMD */
79 #define SET_BPM_SERDES_CMD    1
80 #define CLE_BPM_SERDES_CMD    0
81
82 /* BPM Register Address*/
83 enum {
84         BPM_REG_CGLS_EN = 0,        /* Enable/Disable CGLS */
85         BPM_REG_CGLS_ON,            /* ON/OFF CGLS: shall be controlled by RLC FW */
86         BPM_REG_CGCG_OVERRIDE,      /* Set/Clear CGCG Override */
87         BPM_REG_MGCG_OVERRIDE,      /* Set/Clear MGCG Override */
88         BPM_REG_FGCG_OVERRIDE,      /* Set/Clear FGCG Override */
89         BPM_REG_FGCG_MAX
90 };
91
92 #define RLC_FormatDirectRegListLength        14
93
94 MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
95 MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
96 MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
97 MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
98 MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
99 MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
100
101 MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
102 MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
103 MODULE_FIRMWARE("amdgpu/stoney_me.bin");
104 MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
105 MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
106
107 MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
108 MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
109 MODULE_FIRMWARE("amdgpu/tonga_me.bin");
110 MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
111 MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
112 MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
113
114 MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
115 MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
116 MODULE_FIRMWARE("amdgpu/topaz_me.bin");
117 MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
118 MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
119
120 MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
121 MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
122 MODULE_FIRMWARE("amdgpu/fiji_me.bin");
123 MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
124 MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
125 MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
126
127 MODULE_FIRMWARE("amdgpu/polaris11_ce.bin");
128 MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin");
129 MODULE_FIRMWARE("amdgpu/polaris11_me.bin");
130 MODULE_FIRMWARE("amdgpu/polaris11_mec.bin");
131 MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin");
132 MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin");
133
134 MODULE_FIRMWARE("amdgpu/polaris10_ce.bin");
135 MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin");
136 MODULE_FIRMWARE("amdgpu/polaris10_me.bin");
137 MODULE_FIRMWARE("amdgpu/polaris10_mec.bin");
138 MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin");
139 MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin");
140
141 MODULE_FIRMWARE("amdgpu/polaris12_ce.bin");
142 MODULE_FIRMWARE("amdgpu/polaris12_pfp.bin");
143 MODULE_FIRMWARE("amdgpu/polaris12_me.bin");
144 MODULE_FIRMWARE("amdgpu/polaris12_mec.bin");
145 MODULE_FIRMWARE("amdgpu/polaris12_mec2.bin");
146 MODULE_FIRMWARE("amdgpu/polaris12_rlc.bin");
147
148 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
149 {
150         {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
151         {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
152         {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
153         {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
154         {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
155         {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
156         {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
157         {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
158         {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
159         {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
160         {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
161         {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
162         {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
163         {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
164         {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
165         {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
166 };
167
168 static const u32 golden_settings_tonga_a11[] =
169 {
170         mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
171         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
172         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
173         mmGB_GPU_ID, 0x0000000f, 0x00000000,
174         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
175         mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
176         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
177         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
178         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
179         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
180         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
181         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
182         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
183         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
184         mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
185         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
186 };
187
188 static const u32 tonga_golden_common_all[] =
189 {
190         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
191         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
192         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
193         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
194         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
195         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
196         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
197         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
198 };
199
200 static const u32 tonga_mgcg_cgcg_init[] =
201 {
202         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
203         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
204         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
205         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
206         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
207         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
208         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
209         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
210         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
211         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
212         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
213         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
214         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
215         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
216         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
217         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
218         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
219         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
220         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
221         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
222         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
223         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
224         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
225         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
226         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
227         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
228         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
229         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
230         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
231         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
232         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
233         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
234         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
235         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
236         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
237         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
238         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
239         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
240         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
241         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
242         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
243         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
244         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
245         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
246         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
247         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
248         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
249         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
250         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
251         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
252         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
253         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
254         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
255         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
256         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
257         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
258         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
259         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
260         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
261         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
262         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
263         mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
264         mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
265         mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
266         mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
267         mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
268         mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
269         mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
270         mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
271         mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
272         mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
273         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
274         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
275         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
276         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
277 };
278
279 static const u32 golden_settings_polaris11_a11[] =
280 {
281         mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208,
282         mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
283         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
284         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
285         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
286         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
287         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
288         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
289         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
290         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
291         mmSQ_CONFIG, 0x07f80000, 0x01180000,
292         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
293         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
294         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
295         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
296         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
297         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
298 };
299
300 static const u32 polaris11_golden_common_all[] =
301 {
302         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
303         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002,
304         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
305         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
306         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
307         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
308 };
309
310 static const u32 golden_settings_polaris10_a11[] =
311 {
312         mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
313         mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
314         mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
315         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
316         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
317         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
318         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
319         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
320         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a,
321         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
322         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
323         mmSQ_CONFIG, 0x07f80000, 0x07180000,
324         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
325         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
326         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
327         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
328         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
329 };
330
331 static const u32 polaris10_golden_common_all[] =
332 {
333         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
334         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
335         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
336         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
337         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
338         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
339         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
340         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
341 };
342
343 static const u32 fiji_golden_common_all[] =
344 {
345         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
346         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
347         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
348         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
349         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
350         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
351         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
352         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
353         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
354         mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
355 };
356
357 static const u32 golden_settings_fiji_a10[] =
358 {
359         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
360         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
361         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
362         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
363         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
364         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
365         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
366         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
367         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
368         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
369         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
370 };
371
372 static const u32 fiji_mgcg_cgcg_init[] =
373 {
374         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
375         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
376         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
377         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
378         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
379         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
380         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
381         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
382         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
383         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
384         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
385         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
386         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
387         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
388         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
389         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
390         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
391         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
392         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
393         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
394         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
395         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
396         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
397         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
398         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
399         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
400         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
401         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
402         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
403         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
404         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
405         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
406         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
407         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
408         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
409 };
410
411 static const u32 golden_settings_iceland_a11[] =
412 {
413         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
414         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
415         mmDB_DEBUG3, 0xc0000000, 0xc0000000,
416         mmGB_GPU_ID, 0x0000000f, 0x00000000,
417         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
418         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
419         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
420         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
421         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
422         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
423         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
424         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
425         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
426         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
427         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
428         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
429 };
430
431 static const u32 iceland_golden_common_all[] =
432 {
433         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
434         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
435         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
436         mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
437         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
438         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
439         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
440         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
441 };
442
443 static const u32 iceland_mgcg_cgcg_init[] =
444 {
445         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
446         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
447         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
448         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
449         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
450         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
451         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
452         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
453         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
454         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
455         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
456         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
457         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
458         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
459         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
460         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
461         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
462         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
463         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
464         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
465         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
466         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
467         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
468         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
469         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
470         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
471         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
472         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
473         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
474         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
475         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
476         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
477         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
478         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
479         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
480         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
481         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
482         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
483         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
484         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
485         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
486         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
487         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
488         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
489         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
490         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
491         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
492         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
493         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
494         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
495         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
496         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
497         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
498         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
499         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
500         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
501         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
502         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
503         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
504         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
505         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
506         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
507         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
508         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
509 };
510
511 static const u32 cz_golden_settings_a11[] =
512 {
513         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
514         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
515         mmGB_GPU_ID, 0x0000000f, 0x00000000,
516         mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
517         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
518         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
519         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
520         mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
521         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
522         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
523         mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
524         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
525 };
526
527 static const u32 cz_golden_common_all[] =
528 {
529         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
530         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
531         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
532         mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
533         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
534         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
535         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
536         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
537 };
538
539 static const u32 cz_mgcg_cgcg_init[] =
540 {
541         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
542         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
543         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
544         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
545         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
546         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
547         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
548         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
549         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
550         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
551         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
552         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
553         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
554         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
555         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
556         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
557         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
558         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
559         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
560         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
561         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
562         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
563         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
564         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
565         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
566         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
567         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
568         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
569         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
570         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
571         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
572         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
573         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
574         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
575         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
576         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
577         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
578         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
579         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
580         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
581         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
582         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
583         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
584         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
585         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
586         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
587         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
588         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
589         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
590         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
591         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
592         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
593         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
594         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
595         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
596         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
597         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
598         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
599         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
600         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
601         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
602         mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
603         mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
604         mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
605         mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
606         mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
607         mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
608         mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
609         mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
610         mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
611         mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
612         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
613         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
614         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
615         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
616 };
617
618 static const u32 stoney_golden_settings_a11[] =
619 {
620         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
621         mmGB_GPU_ID, 0x0000000f, 0x00000000,
622         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
623         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
624         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
625         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
626         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
627         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
628         mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
629         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
630 };
631
632 static const u32 stoney_golden_common_all[] =
633 {
634         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
635         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
636         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
637         mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
638         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
639         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
640         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
641         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
642 };
643
644 static const u32 stoney_mgcg_cgcg_init[] =
645 {
646         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
647         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
648         mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
649         mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
650         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
651 };
652
653 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
654 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
655 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
656 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
657 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
658 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
659 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring);
660 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring);
661
662 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
663 {
664         switch (adev->asic_type) {
665         case CHIP_TOPAZ:
666                 amdgpu_program_register_sequence(adev,
667                                                  iceland_mgcg_cgcg_init,
668                                                  (const u32)ARRAY_SIZE(iceland_mgcg_cgcg_init));
669                 amdgpu_program_register_sequence(adev,
670                                                  golden_settings_iceland_a11,
671                                                  (const u32)ARRAY_SIZE(golden_settings_iceland_a11));
672                 amdgpu_program_register_sequence(adev,
673                                                  iceland_golden_common_all,
674                                                  (const u32)ARRAY_SIZE(iceland_golden_common_all));
675                 break;
676         case CHIP_FIJI:
677                 amdgpu_program_register_sequence(adev,
678                                                  fiji_mgcg_cgcg_init,
679                                                  (const u32)ARRAY_SIZE(fiji_mgcg_cgcg_init));
680                 amdgpu_program_register_sequence(adev,
681                                                  golden_settings_fiji_a10,
682                                                  (const u32)ARRAY_SIZE(golden_settings_fiji_a10));
683                 amdgpu_program_register_sequence(adev,
684                                                  fiji_golden_common_all,
685                                                  (const u32)ARRAY_SIZE(fiji_golden_common_all));
686                 break;
687
688         case CHIP_TONGA:
689                 amdgpu_program_register_sequence(adev,
690                                                  tonga_mgcg_cgcg_init,
691                                                  (const u32)ARRAY_SIZE(tonga_mgcg_cgcg_init));
692                 amdgpu_program_register_sequence(adev,
693                                                  golden_settings_tonga_a11,
694                                                  (const u32)ARRAY_SIZE(golden_settings_tonga_a11));
695                 amdgpu_program_register_sequence(adev,
696                                                  tonga_golden_common_all,
697                                                  (const u32)ARRAY_SIZE(tonga_golden_common_all));
698                 break;
699         case CHIP_POLARIS11:
700         case CHIP_POLARIS12:
701                 amdgpu_program_register_sequence(adev,
702                                                  golden_settings_polaris11_a11,
703                                                  (const u32)ARRAY_SIZE(golden_settings_polaris11_a11));
704                 amdgpu_program_register_sequence(adev,
705                                                  polaris11_golden_common_all,
706                                                  (const u32)ARRAY_SIZE(polaris11_golden_common_all));
707                 break;
708         case CHIP_POLARIS10:
709                 amdgpu_program_register_sequence(adev,
710                                                  golden_settings_polaris10_a11,
711                                                  (const u32)ARRAY_SIZE(golden_settings_polaris10_a11));
712                 amdgpu_program_register_sequence(adev,
713                                                  polaris10_golden_common_all,
714                                                  (const u32)ARRAY_SIZE(polaris10_golden_common_all));
715                 WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C);
716                 if (adev->pdev->revision == 0xc7 &&
717                     ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) ||
718                      (adev->pdev->subsystem_device == 0x4a8 && adev->pdev->subsystem_vendor == 0x1043) ||
719                      (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1682))) {
720                         amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD);
721                         amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0);
722                 }
723                 break;
724         case CHIP_CARRIZO:
725                 amdgpu_program_register_sequence(adev,
726                                                  cz_mgcg_cgcg_init,
727                                                  (const u32)ARRAY_SIZE(cz_mgcg_cgcg_init));
728                 amdgpu_program_register_sequence(adev,
729                                                  cz_golden_settings_a11,
730                                                  (const u32)ARRAY_SIZE(cz_golden_settings_a11));
731                 amdgpu_program_register_sequence(adev,
732                                                  cz_golden_common_all,
733                                                  (const u32)ARRAY_SIZE(cz_golden_common_all));
734                 break;
735         case CHIP_STONEY:
736                 amdgpu_program_register_sequence(adev,
737                                                  stoney_mgcg_cgcg_init,
738                                                  (const u32)ARRAY_SIZE(stoney_mgcg_cgcg_init));
739                 amdgpu_program_register_sequence(adev,
740                                                  stoney_golden_settings_a11,
741                                                  (const u32)ARRAY_SIZE(stoney_golden_settings_a11));
742                 amdgpu_program_register_sequence(adev,
743                                                  stoney_golden_common_all,
744                                                  (const u32)ARRAY_SIZE(stoney_golden_common_all));
745                 break;
746         default:
747                 break;
748         }
749 }
750
751 static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
752 {
753         adev->gfx.scratch.num_reg = 8;
754         adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
755         adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
756 }
757
758 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
759 {
760         struct amdgpu_device *adev = ring->adev;
761         uint32_t scratch;
762         uint32_t tmp = 0;
763         unsigned i;
764         int r;
765
766         r = amdgpu_gfx_scratch_get(adev, &scratch);
767         if (r) {
768                 DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
769                 return r;
770         }
771         WREG32(scratch, 0xCAFEDEAD);
772         r = amdgpu_ring_alloc(ring, 3);
773         if (r) {
774                 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
775                           ring->idx, r);
776                 amdgpu_gfx_scratch_free(adev, scratch);
777                 return r;
778         }
779         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
780         amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
781         amdgpu_ring_write(ring, 0xDEADBEEF);
782         amdgpu_ring_commit(ring);
783
784         for (i = 0; i < adev->usec_timeout; i++) {
785                 tmp = RREG32(scratch);
786                 if (tmp == 0xDEADBEEF)
787                         break;
788                 DRM_UDELAY(1);
789         }
790         if (i < adev->usec_timeout) {
791                 DRM_INFO("ring test on %d succeeded in %d usecs\n",
792                          ring->idx, i);
793         } else {
794                 DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
795                           ring->idx, scratch, tmp);
796                 r = -EINVAL;
797         }
798         amdgpu_gfx_scratch_free(adev, scratch);
799         return r;
800 }
801
802 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
803 {
804         struct amdgpu_device *adev = ring->adev;
805         struct amdgpu_ib ib;
806         struct dma_fence *f = NULL;
807         uint32_t scratch;
808         uint32_t tmp = 0;
809         long r;
810
811         r = amdgpu_gfx_scratch_get(adev, &scratch);
812         if (r) {
813                 DRM_ERROR("amdgpu: failed to get scratch reg (%ld).\n", r);
814                 return r;
815         }
816         WREG32(scratch, 0xCAFEDEAD);
817         memset(&ib, 0, sizeof(ib));
818         r = amdgpu_ib_get(adev, NULL, 256, &ib);
819         if (r) {
820                 DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
821                 goto err1;
822         }
823         ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
824         ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START));
825         ib.ptr[2] = 0xDEADBEEF;
826         ib.length_dw = 3;
827
828         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
829         if (r)
830                 goto err2;
831
832         r = dma_fence_wait_timeout(f, false, timeout);
833         if (r == 0) {
834                 DRM_ERROR("amdgpu: IB test timed out.\n");
835                 r = -ETIMEDOUT;
836                 goto err2;
837         } else if (r < 0) {
838                 DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
839                 goto err2;
840         }
841         tmp = RREG32(scratch);
842         if (tmp == 0xDEADBEEF) {
843                 DRM_INFO("ib test on ring %d succeeded\n", ring->idx);
844                 r = 0;
845         } else {
846                 DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n",
847                           scratch, tmp);
848                 r = -EINVAL;
849         }
850 err2:
851         amdgpu_ib_free(adev, &ib, NULL);
852         dma_fence_put(f);
853 err1:
854         amdgpu_gfx_scratch_free(adev, scratch);
855         return r;
856 }
857
858
859 static void gfx_v8_0_free_microcode(struct amdgpu_device *adev)
860 {
861         release_firmware(adev->gfx.pfp_fw);
862         adev->gfx.pfp_fw = NULL;
863         release_firmware(adev->gfx.me_fw);
864         adev->gfx.me_fw = NULL;
865         release_firmware(adev->gfx.ce_fw);
866         adev->gfx.ce_fw = NULL;
867         release_firmware(adev->gfx.rlc_fw);
868         adev->gfx.rlc_fw = NULL;
869         release_firmware(adev->gfx.mec_fw);
870         adev->gfx.mec_fw = NULL;
871         if ((adev->asic_type != CHIP_STONEY) &&
872             (adev->asic_type != CHIP_TOPAZ))
873                 release_firmware(adev->gfx.mec2_fw);
874         adev->gfx.mec2_fw = NULL;
875
876         kfree(adev->gfx.rlc.register_list_format);
877 }
878
879 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
880 {
881         const char *chip_name;
882         char fw_name[30];
883         int err;
884         struct amdgpu_firmware_info *info = NULL;
885         const struct common_firmware_header *header = NULL;
886         const struct gfx_firmware_header_v1_0 *cp_hdr;
887         const struct rlc_firmware_header_v2_0 *rlc_hdr;
888         unsigned int *tmp = NULL, i;
889
890         DRM_DEBUG("\n");
891
892         switch (adev->asic_type) {
893         case CHIP_TOPAZ:
894                 chip_name = "topaz";
895                 break;
896         case CHIP_TONGA:
897                 chip_name = "tonga";
898                 break;
899         case CHIP_CARRIZO:
900                 chip_name = "carrizo";
901                 break;
902         case CHIP_FIJI:
903                 chip_name = "fiji";
904                 break;
905         case CHIP_POLARIS11:
906                 chip_name = "polaris11";
907                 break;
908         case CHIP_POLARIS10:
909                 chip_name = "polaris10";
910                 break;
911         case CHIP_POLARIS12:
912                 chip_name = "polaris12";
913                 break;
914         case CHIP_STONEY:
915                 chip_name = "stoney";
916                 break;
917         default:
918                 BUG();
919         }
920
921         if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
922                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp_2.bin", chip_name);
923                 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
924                 if (err == -ENOENT) {
925                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
926                         err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
927                 }
928         } else {
929                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
930                 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
931         }
932         if (err)
933                 goto out;
934         err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
935         if (err)
936                 goto out;
937         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
938         adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
939         adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
940
941         if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
942                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me_2.bin", chip_name);
943                 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
944                 if (err == -ENOENT) {
945                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
946                         err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
947                 }
948         } else {
949                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
950                 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
951         }
952         if (err)
953                 goto out;
954         err = amdgpu_ucode_validate(adev->gfx.me_fw);
955         if (err)
956                 goto out;
957         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
958         adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
959
960         adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
961
962         if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
963                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce_2.bin", chip_name);
964                 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
965                 if (err == -ENOENT) {
966                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
967                         err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
968                 }
969         } else {
970                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
971                 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
972         }
973         if (err)
974                 goto out;
975         err = amdgpu_ucode_validate(adev->gfx.ce_fw);
976         if (err)
977                 goto out;
978         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
979         adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
980         adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
981
982         /*
983          * Support for MCBP/Virtualization in combination with chained IBs is
984          * formal released on feature version #46
985          */
986         if (adev->gfx.ce_feature_version >= 46 &&
987             adev->gfx.pfp_feature_version >= 46) {
988                 adev->virt.chained_ib_support = true;
989                 DRM_INFO("Chained IB support enabled!\n");
990         } else
991                 adev->virt.chained_ib_support = false;
992
993         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
994         err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
995         if (err)
996                 goto out;
997         err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
998         rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
999         adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
1000         adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
1001
1002         adev->gfx.rlc.save_and_restore_offset =
1003                         le32_to_cpu(rlc_hdr->save_and_restore_offset);
1004         adev->gfx.rlc.clear_state_descriptor_offset =
1005                         le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
1006         adev->gfx.rlc.avail_scratch_ram_locations =
1007                         le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
1008         adev->gfx.rlc.reg_restore_list_size =
1009                         le32_to_cpu(rlc_hdr->reg_restore_list_size);
1010         adev->gfx.rlc.reg_list_format_start =
1011                         le32_to_cpu(rlc_hdr->reg_list_format_start);
1012         adev->gfx.rlc.reg_list_format_separate_start =
1013                         le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
1014         adev->gfx.rlc.starting_offsets_start =
1015                         le32_to_cpu(rlc_hdr->starting_offsets_start);
1016         adev->gfx.rlc.reg_list_format_size_bytes =
1017                         le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
1018         adev->gfx.rlc.reg_list_size_bytes =
1019                         le32_to_cpu(rlc_hdr->reg_list_size_bytes);
1020
1021         adev->gfx.rlc.register_list_format =
1022                         kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
1023                                         adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
1024
1025         if (!adev->gfx.rlc.register_list_format) {
1026                 err = -ENOMEM;
1027                 goto out;
1028         }
1029
1030         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1031                         le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1032         for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++)
1033                 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
1034
1035         adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1036
1037         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1038                         le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1039         for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++)
1040                 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
1041
1042         if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1043                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec_2.bin", chip_name);
1044                 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1045                 if (err == -ENOENT) {
1046                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1047                         err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1048                 }
1049         } else {
1050                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1051                 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1052         }
1053         if (err)
1054                 goto out;
1055         err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1056         if (err)
1057                 goto out;
1058         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1059         adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1060         adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1061
1062         if ((adev->asic_type != CHIP_STONEY) &&
1063             (adev->asic_type != CHIP_TOPAZ)) {
1064                 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1065                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2_2.bin", chip_name);
1066                         err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1067                         if (err == -ENOENT) {
1068                                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1069                                 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1070                         }
1071                 } else {
1072                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1073                         err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1074                 }
1075                 if (!err) {
1076                         err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1077                         if (err)
1078                                 goto out;
1079                         cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1080                                 adev->gfx.mec2_fw->data;
1081                         adev->gfx.mec2_fw_version =
1082                                 le32_to_cpu(cp_hdr->header.ucode_version);
1083                         adev->gfx.mec2_feature_version =
1084                                 le32_to_cpu(cp_hdr->ucode_feature_version);
1085                 } else {
1086                         err = 0;
1087                         adev->gfx.mec2_fw = NULL;
1088                 }
1089         }
1090
1091         if (adev->firmware.load_type == AMDGPU_FW_LOAD_SMU) {
1092                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1093                 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1094                 info->fw = adev->gfx.pfp_fw;
1095                 header = (const struct common_firmware_header *)info->fw->data;
1096                 adev->firmware.fw_size +=
1097                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1098
1099                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1100                 info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1101                 info->fw = adev->gfx.me_fw;
1102                 header = (const struct common_firmware_header *)info->fw->data;
1103                 adev->firmware.fw_size +=
1104                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1105
1106                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1107                 info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1108                 info->fw = adev->gfx.ce_fw;
1109                 header = (const struct common_firmware_header *)info->fw->data;
1110                 adev->firmware.fw_size +=
1111                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1112
1113                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1114                 info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1115                 info->fw = adev->gfx.rlc_fw;
1116                 header = (const struct common_firmware_header *)info->fw->data;
1117                 adev->firmware.fw_size +=
1118                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1119
1120                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1121                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1122                 info->fw = adev->gfx.mec_fw;
1123                 header = (const struct common_firmware_header *)info->fw->data;
1124                 adev->firmware.fw_size +=
1125                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1126
1127                 /* we need account JT in */
1128                 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1129                 adev->firmware.fw_size +=
1130                         ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE);
1131
1132                 if (amdgpu_sriov_vf(adev)) {
1133                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_STORAGE];
1134                         info->ucode_id = AMDGPU_UCODE_ID_STORAGE;
1135                         info->fw = adev->gfx.mec_fw;
1136                         adev->firmware.fw_size +=
1137                                 ALIGN(le32_to_cpu(64 * PAGE_SIZE), PAGE_SIZE);
1138                 }
1139
1140                 if (adev->gfx.mec2_fw) {
1141                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1142                         info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1143                         info->fw = adev->gfx.mec2_fw;
1144                         header = (const struct common_firmware_header *)info->fw->data;
1145                         adev->firmware.fw_size +=
1146                                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1147                 }
1148
1149         }
1150
1151 out:
1152         if (err) {
1153                 dev_err(adev->dev,
1154                         "gfx8: Failed to load firmware \"%s\"\n",
1155                         fw_name);
1156                 release_firmware(adev->gfx.pfp_fw);
1157                 adev->gfx.pfp_fw = NULL;
1158                 release_firmware(adev->gfx.me_fw);
1159                 adev->gfx.me_fw = NULL;
1160                 release_firmware(adev->gfx.ce_fw);
1161                 adev->gfx.ce_fw = NULL;
1162                 release_firmware(adev->gfx.rlc_fw);
1163                 adev->gfx.rlc_fw = NULL;
1164                 release_firmware(adev->gfx.mec_fw);
1165                 adev->gfx.mec_fw = NULL;
1166                 release_firmware(adev->gfx.mec2_fw);
1167                 adev->gfx.mec2_fw = NULL;
1168         }
1169         return err;
1170 }
1171
1172 static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
1173                                     volatile u32 *buffer)
1174 {
1175         u32 count = 0, i;
1176         const struct cs_section_def *sect = NULL;
1177         const struct cs_extent_def *ext = NULL;
1178
1179         if (adev->gfx.rlc.cs_data == NULL)
1180                 return;
1181         if (buffer == NULL)
1182                 return;
1183
1184         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1185         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1186
1187         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1188         buffer[count++] = cpu_to_le32(0x80000000);
1189         buffer[count++] = cpu_to_le32(0x80000000);
1190
1191         for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1192                 for (ext = sect->section; ext->extent != NULL; ++ext) {
1193                         if (sect->id == SECT_CONTEXT) {
1194                                 buffer[count++] =
1195                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1196                                 buffer[count++] = cpu_to_le32(ext->reg_index -
1197                                                 PACKET3_SET_CONTEXT_REG_START);
1198                                 for (i = 0; i < ext->reg_count; i++)
1199                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
1200                         } else {
1201                                 return;
1202                         }
1203                 }
1204         }
1205
1206         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1207         buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
1208                         PACKET3_SET_CONTEXT_REG_START);
1209         buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config);
1210         buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1);
1211
1212         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1213         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1214
1215         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1216         buffer[count++] = cpu_to_le32(0);
1217 }
1218
1219 static void cz_init_cp_jump_table(struct amdgpu_device *adev)
1220 {
1221         const __le32 *fw_data;
1222         volatile u32 *dst_ptr;
1223         int me, i, max_me = 4;
1224         u32 bo_offset = 0;
1225         u32 table_offset, table_size;
1226
1227         if (adev->asic_type == CHIP_CARRIZO)
1228                 max_me = 5;
1229
1230         /* write the cp table buffer */
1231         dst_ptr = adev->gfx.rlc.cp_table_ptr;
1232         for (me = 0; me < max_me; me++) {
1233                 if (me == 0) {
1234                         const struct gfx_firmware_header_v1_0 *hdr =
1235                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1236                         fw_data = (const __le32 *)
1237                                 (adev->gfx.ce_fw->data +
1238                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1239                         table_offset = le32_to_cpu(hdr->jt_offset);
1240                         table_size = le32_to_cpu(hdr->jt_size);
1241                 } else if (me == 1) {
1242                         const struct gfx_firmware_header_v1_0 *hdr =
1243                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1244                         fw_data = (const __le32 *)
1245                                 (adev->gfx.pfp_fw->data +
1246                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1247                         table_offset = le32_to_cpu(hdr->jt_offset);
1248                         table_size = le32_to_cpu(hdr->jt_size);
1249                 } else if (me == 2) {
1250                         const struct gfx_firmware_header_v1_0 *hdr =
1251                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1252                         fw_data = (const __le32 *)
1253                                 (adev->gfx.me_fw->data +
1254                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1255                         table_offset = le32_to_cpu(hdr->jt_offset);
1256                         table_size = le32_to_cpu(hdr->jt_size);
1257                 } else if (me == 3) {
1258                         const struct gfx_firmware_header_v1_0 *hdr =
1259                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1260                         fw_data = (const __le32 *)
1261                                 (adev->gfx.mec_fw->data +
1262                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1263                         table_offset = le32_to_cpu(hdr->jt_offset);
1264                         table_size = le32_to_cpu(hdr->jt_size);
1265                 } else  if (me == 4) {
1266                         const struct gfx_firmware_header_v1_0 *hdr =
1267                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
1268                         fw_data = (const __le32 *)
1269                                 (adev->gfx.mec2_fw->data +
1270                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1271                         table_offset = le32_to_cpu(hdr->jt_offset);
1272                         table_size = le32_to_cpu(hdr->jt_size);
1273                 }
1274
1275                 for (i = 0; i < table_size; i ++) {
1276                         dst_ptr[bo_offset + i] =
1277                                 cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
1278                 }
1279
1280                 bo_offset += table_size;
1281         }
1282 }
1283
1284 static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev)
1285 {
1286         amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, NULL, NULL);
1287         amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, NULL, NULL);
1288 }
1289
1290 static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
1291 {
1292         volatile u32 *dst_ptr;
1293         u32 dws;
1294         const struct cs_section_def *cs_data;
1295         int r;
1296
1297         adev->gfx.rlc.cs_data = vi_cs_data;
1298
1299         cs_data = adev->gfx.rlc.cs_data;
1300
1301         if (cs_data) {
1302                 /* clear state block */
1303                 adev->gfx.rlc.clear_state_size = dws = gfx_v8_0_get_csb_size(adev);
1304
1305                 r = amdgpu_bo_create_reserved(adev, dws * 4, PAGE_SIZE,
1306                                               AMDGPU_GEM_DOMAIN_VRAM,
1307                                               &adev->gfx.rlc.clear_state_obj,
1308                                               &adev->gfx.rlc.clear_state_gpu_addr,
1309                                               (void **)&adev->gfx.rlc.cs_ptr);
1310                 if (r) {
1311                         dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r);
1312                         gfx_v8_0_rlc_fini(adev);
1313                         return r;
1314                 }
1315
1316                 /* set up the cs buffer */
1317                 dst_ptr = adev->gfx.rlc.cs_ptr;
1318                 gfx_v8_0_get_csb_buffer(adev, dst_ptr);
1319                 amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
1320                 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1321         }
1322
1323         if ((adev->asic_type == CHIP_CARRIZO) ||
1324             (adev->asic_type == CHIP_STONEY)) {
1325                 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1326                 r = amdgpu_bo_create_reserved(adev, adev->gfx.rlc.cp_table_size,
1327                                               PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
1328                                               &adev->gfx.rlc.cp_table_obj,
1329                                               &adev->gfx.rlc.cp_table_gpu_addr,
1330                                               (void **)&adev->gfx.rlc.cp_table_ptr);
1331                 if (r) {
1332                         dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r);
1333                         return r;
1334                 }
1335
1336                 cz_init_cp_jump_table(adev);
1337
1338                 amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj);
1339                 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1340         }
1341
1342         return 0;
1343 }
1344
1345 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
1346 {
1347         amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1348 }
1349
1350 static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
1351 {
1352         int r;
1353         u32 *hpd;
1354         size_t mec_hpd_size;
1355
1356         bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1357
1358         /* take ownership of the relevant compute queues */
1359         amdgpu_gfx_compute_queue_acquire(adev);
1360
1361         mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE;
1362
1363         r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1364                                       AMDGPU_GEM_DOMAIN_GTT,
1365                                       &adev->gfx.mec.hpd_eop_obj,
1366                                       &adev->gfx.mec.hpd_eop_gpu_addr,
1367                                       (void **)&hpd);
1368         if (r) {
1369                 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1370                 return r;
1371         }
1372
1373         memset(hpd, 0, mec_hpd_size);
1374
1375         amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1376         amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1377
1378         return 0;
1379 }
1380
1381 static const u32 vgpr_init_compute_shader[] =
1382 {
1383         0x7e000209, 0x7e020208,
1384         0x7e040207, 0x7e060206,
1385         0x7e080205, 0x7e0a0204,
1386         0x7e0c0203, 0x7e0e0202,
1387         0x7e100201, 0x7e120200,
1388         0x7e140209, 0x7e160208,
1389         0x7e180207, 0x7e1a0206,
1390         0x7e1c0205, 0x7e1e0204,
1391         0x7e200203, 0x7e220202,
1392         0x7e240201, 0x7e260200,
1393         0x7e280209, 0x7e2a0208,
1394         0x7e2c0207, 0x7e2e0206,
1395         0x7e300205, 0x7e320204,
1396         0x7e340203, 0x7e360202,
1397         0x7e380201, 0x7e3a0200,
1398         0x7e3c0209, 0x7e3e0208,
1399         0x7e400207, 0x7e420206,
1400         0x7e440205, 0x7e460204,
1401         0x7e480203, 0x7e4a0202,
1402         0x7e4c0201, 0x7e4e0200,
1403         0x7e500209, 0x7e520208,
1404         0x7e540207, 0x7e560206,
1405         0x7e580205, 0x7e5a0204,
1406         0x7e5c0203, 0x7e5e0202,
1407         0x7e600201, 0x7e620200,
1408         0x7e640209, 0x7e660208,
1409         0x7e680207, 0x7e6a0206,
1410         0x7e6c0205, 0x7e6e0204,
1411         0x7e700203, 0x7e720202,
1412         0x7e740201, 0x7e760200,
1413         0x7e780209, 0x7e7a0208,
1414         0x7e7c0207, 0x7e7e0206,
1415         0xbf8a0000, 0xbf810000,
1416 };
1417
1418 static const u32 sgpr_init_compute_shader[] =
1419 {
1420         0xbe8a0100, 0xbe8c0102,
1421         0xbe8e0104, 0xbe900106,
1422         0xbe920108, 0xbe940100,
1423         0xbe960102, 0xbe980104,
1424         0xbe9a0106, 0xbe9c0108,
1425         0xbe9e0100, 0xbea00102,
1426         0xbea20104, 0xbea40106,
1427         0xbea60108, 0xbea80100,
1428         0xbeaa0102, 0xbeac0104,
1429         0xbeae0106, 0xbeb00108,
1430         0xbeb20100, 0xbeb40102,
1431         0xbeb60104, 0xbeb80106,
1432         0xbeba0108, 0xbebc0100,
1433         0xbebe0102, 0xbec00104,
1434         0xbec20106, 0xbec40108,
1435         0xbec60100, 0xbec80102,
1436         0xbee60004, 0xbee70005,
1437         0xbeea0006, 0xbeeb0007,
1438         0xbee80008, 0xbee90009,
1439         0xbefc0000, 0xbf8a0000,
1440         0xbf810000, 0x00000000,
1441 };
1442
1443 static const u32 vgpr_init_regs[] =
1444 {
1445         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1446         mmCOMPUTE_RESOURCE_LIMITS, 0,
1447         mmCOMPUTE_NUM_THREAD_X, 256*4,
1448         mmCOMPUTE_NUM_THREAD_Y, 1,
1449         mmCOMPUTE_NUM_THREAD_Z, 1,
1450         mmCOMPUTE_PGM_RSRC2, 20,
1451         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1452         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1453         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1454         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1455         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1456         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1457         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1458         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1459         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1460         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1461 };
1462
1463 static const u32 sgpr1_init_regs[] =
1464 {
1465         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1466         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1467         mmCOMPUTE_NUM_THREAD_X, 256*5,
1468         mmCOMPUTE_NUM_THREAD_Y, 1,
1469         mmCOMPUTE_NUM_THREAD_Z, 1,
1470         mmCOMPUTE_PGM_RSRC2, 20,
1471         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1472         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1473         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1474         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1475         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1476         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1477         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1478         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1479         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1480         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1481 };
1482
1483 static const u32 sgpr2_init_regs[] =
1484 {
1485         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1486         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1487         mmCOMPUTE_NUM_THREAD_X, 256*5,
1488         mmCOMPUTE_NUM_THREAD_Y, 1,
1489         mmCOMPUTE_NUM_THREAD_Z, 1,
1490         mmCOMPUTE_PGM_RSRC2, 20,
1491         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1492         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1493         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1494         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1495         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1496         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1497         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1498         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1499         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1500         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1501 };
1502
1503 static const u32 sec_ded_counter_registers[] =
1504 {
1505         mmCPC_EDC_ATC_CNT,
1506         mmCPC_EDC_SCRATCH_CNT,
1507         mmCPC_EDC_UCODE_CNT,
1508         mmCPF_EDC_ATC_CNT,
1509         mmCPF_EDC_ROQ_CNT,
1510         mmCPF_EDC_TAG_CNT,
1511         mmCPG_EDC_ATC_CNT,
1512         mmCPG_EDC_DMA_CNT,
1513         mmCPG_EDC_TAG_CNT,
1514         mmDC_EDC_CSINVOC_CNT,
1515         mmDC_EDC_RESTORE_CNT,
1516         mmDC_EDC_STATE_CNT,
1517         mmGDS_EDC_CNT,
1518         mmGDS_EDC_GRBM_CNT,
1519         mmGDS_EDC_OA_DED,
1520         mmSPI_EDC_CNT,
1521         mmSQC_ATC_EDC_GATCL1_CNT,
1522         mmSQC_EDC_CNT,
1523         mmSQ_EDC_DED_CNT,
1524         mmSQ_EDC_INFO,
1525         mmSQ_EDC_SEC_CNT,
1526         mmTCC_EDC_CNT,
1527         mmTCP_ATC_EDC_GATCL1_CNT,
1528         mmTCP_EDC_CNT,
1529         mmTD_EDC_CNT
1530 };
1531
1532 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1533 {
1534         struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1535         struct amdgpu_ib ib;
1536         struct dma_fence *f = NULL;
1537         int r, i;
1538         u32 tmp;
1539         unsigned total_size, vgpr_offset, sgpr_offset;
1540         u64 gpu_addr;
1541
1542         /* only supported on CZ */
1543         if (adev->asic_type != CHIP_CARRIZO)
1544                 return 0;
1545
1546         /* bail if the compute ring is not ready */
1547         if (!ring->ready)
1548                 return 0;
1549
1550         tmp = RREG32(mmGB_EDC_MODE);
1551         WREG32(mmGB_EDC_MODE, 0);
1552
1553         total_size =
1554                 (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1555         total_size +=
1556                 (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1557         total_size +=
1558                 (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1559         total_size = ALIGN(total_size, 256);
1560         vgpr_offset = total_size;
1561         total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1562         sgpr_offset = total_size;
1563         total_size += sizeof(sgpr_init_compute_shader);
1564
1565         /* allocate an indirect buffer to put the commands in */
1566         memset(&ib, 0, sizeof(ib));
1567         r = amdgpu_ib_get(adev, NULL, total_size, &ib);
1568         if (r) {
1569                 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1570                 return r;
1571         }
1572
1573         /* load the compute shaders */
1574         for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1575                 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1576
1577         for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1578                 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1579
1580         /* init the ib length to 0 */
1581         ib.length_dw = 0;
1582
1583         /* VGPR */
1584         /* write the register state for the compute dispatch */
1585         for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1586                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1587                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1588                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1589         }
1590         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1591         gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1592         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1593         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1594         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1595         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1596
1597         /* write dispatch packet */
1598         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1599         ib.ptr[ib.length_dw++] = 8; /* x */
1600         ib.ptr[ib.length_dw++] = 1; /* y */
1601         ib.ptr[ib.length_dw++] = 1; /* z */
1602         ib.ptr[ib.length_dw++] =
1603                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1604
1605         /* write CS partial flush packet */
1606         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1607         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1608
1609         /* SGPR1 */
1610         /* write the register state for the compute dispatch */
1611         for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1612                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1613                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1614                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1615         }
1616         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1617         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1618         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1619         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1620         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1621         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1622
1623         /* write dispatch packet */
1624         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1625         ib.ptr[ib.length_dw++] = 8; /* x */
1626         ib.ptr[ib.length_dw++] = 1; /* y */
1627         ib.ptr[ib.length_dw++] = 1; /* z */
1628         ib.ptr[ib.length_dw++] =
1629                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1630
1631         /* write CS partial flush packet */
1632         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1633         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1634
1635         /* SGPR2 */
1636         /* write the register state for the compute dispatch */
1637         for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1638                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1639                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1640                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1641         }
1642         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1643         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1644         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1645         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1646         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1647         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1648
1649         /* write dispatch packet */
1650         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1651         ib.ptr[ib.length_dw++] = 8; /* x */
1652         ib.ptr[ib.length_dw++] = 1; /* y */
1653         ib.ptr[ib.length_dw++] = 1; /* z */
1654         ib.ptr[ib.length_dw++] =
1655                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1656
1657         /* write CS partial flush packet */
1658         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1659         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1660
1661         /* shedule the ib on the ring */
1662         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1663         if (r) {
1664                 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1665                 goto fail;
1666         }
1667
1668         /* wait for the GPU to finish processing the IB */
1669         r = dma_fence_wait(f, false);
1670         if (r) {
1671                 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1672                 goto fail;
1673         }
1674
1675         tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1676         tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1677         WREG32(mmGB_EDC_MODE, tmp);
1678
1679         tmp = RREG32(mmCC_GC_EDC_CONFIG);
1680         tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1681         WREG32(mmCC_GC_EDC_CONFIG, tmp);
1682
1683
1684         /* read back registers to clear the counters */
1685         for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1686                 RREG32(sec_ded_counter_registers[i]);
1687
1688 fail:
1689         amdgpu_ib_free(adev, &ib, NULL);
1690         dma_fence_put(f);
1691
1692         return r;
1693 }
1694
1695 static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1696 {
1697         u32 gb_addr_config;
1698         u32 mc_shared_chmap, mc_arb_ramcfg;
1699         u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1700         u32 tmp;
1701         int ret;
1702
1703         switch (adev->asic_type) {
1704         case CHIP_TOPAZ:
1705                 adev->gfx.config.max_shader_engines = 1;
1706                 adev->gfx.config.max_tile_pipes = 2;
1707                 adev->gfx.config.max_cu_per_sh = 6;
1708                 adev->gfx.config.max_sh_per_se = 1;
1709                 adev->gfx.config.max_backends_per_se = 2;
1710                 adev->gfx.config.max_texture_channel_caches = 2;
1711                 adev->gfx.config.max_gprs = 256;
1712                 adev->gfx.config.max_gs_threads = 32;
1713                 adev->gfx.config.max_hw_contexts = 8;
1714
1715                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1716                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1717                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1718                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1719                 gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1720                 break;
1721         case CHIP_FIJI:
1722                 adev->gfx.config.max_shader_engines = 4;
1723                 adev->gfx.config.max_tile_pipes = 16;
1724                 adev->gfx.config.max_cu_per_sh = 16;
1725                 adev->gfx.config.max_sh_per_se = 1;
1726                 adev->gfx.config.max_backends_per_se = 4;
1727                 adev->gfx.config.max_texture_channel_caches = 16;
1728                 adev->gfx.config.max_gprs = 256;
1729                 adev->gfx.config.max_gs_threads = 32;
1730                 adev->gfx.config.max_hw_contexts = 8;
1731
1732                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1733                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1734                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1735                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1736                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1737                 break;
1738         case CHIP_POLARIS11:
1739         case CHIP_POLARIS12:
1740                 ret = amdgpu_atombios_get_gfx_info(adev);
1741                 if (ret)
1742                         return ret;
1743                 adev->gfx.config.max_gprs = 256;
1744                 adev->gfx.config.max_gs_threads = 32;
1745                 adev->gfx.config.max_hw_contexts = 8;
1746
1747                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1748                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1749                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1750                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1751                 gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
1752                 break;
1753         case CHIP_POLARIS10:
1754                 ret = amdgpu_atombios_get_gfx_info(adev);
1755                 if (ret)
1756                         return ret;
1757                 adev->gfx.config.max_gprs = 256;
1758                 adev->gfx.config.max_gs_threads = 32;
1759                 adev->gfx.config.max_hw_contexts = 8;
1760
1761                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1762                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1763                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1764                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1765                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1766                 break;
1767         case CHIP_TONGA:
1768                 adev->gfx.config.max_shader_engines = 4;
1769                 adev->gfx.config.max_tile_pipes = 8;
1770                 adev->gfx.config.max_cu_per_sh = 8;
1771                 adev->gfx.config.max_sh_per_se = 1;
1772                 adev->gfx.config.max_backends_per_se = 2;
1773                 adev->gfx.config.max_texture_channel_caches = 8;
1774                 adev->gfx.config.max_gprs = 256;
1775                 adev->gfx.config.max_gs_threads = 32;
1776                 adev->gfx.config.max_hw_contexts = 8;
1777
1778                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1779                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1780                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1781                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1782                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1783                 break;
1784         case CHIP_CARRIZO:
1785                 adev->gfx.config.max_shader_engines = 1;
1786                 adev->gfx.config.max_tile_pipes = 2;
1787                 adev->gfx.config.max_sh_per_se = 1;
1788                 adev->gfx.config.max_backends_per_se = 2;
1789                 adev->gfx.config.max_cu_per_sh = 8;
1790                 adev->gfx.config.max_texture_channel_caches = 2;
1791                 adev->gfx.config.max_gprs = 256;
1792                 adev->gfx.config.max_gs_threads = 32;
1793                 adev->gfx.config.max_hw_contexts = 8;
1794
1795                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1796                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1797                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1798                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1799                 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1800                 break;
1801         case CHIP_STONEY:
1802                 adev->gfx.config.max_shader_engines = 1;
1803                 adev->gfx.config.max_tile_pipes = 2;
1804                 adev->gfx.config.max_sh_per_se = 1;
1805                 adev->gfx.config.max_backends_per_se = 1;
1806                 adev->gfx.config.max_cu_per_sh = 3;
1807                 adev->gfx.config.max_texture_channel_caches = 2;
1808                 adev->gfx.config.max_gprs = 256;
1809                 adev->gfx.config.max_gs_threads = 16;
1810                 adev->gfx.config.max_hw_contexts = 8;
1811
1812                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1813                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1814                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1815                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1816                 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1817                 break;
1818         default:
1819                 adev->gfx.config.max_shader_engines = 2;
1820                 adev->gfx.config.max_tile_pipes = 4;
1821                 adev->gfx.config.max_cu_per_sh = 2;
1822                 adev->gfx.config.max_sh_per_se = 1;
1823                 adev->gfx.config.max_backends_per_se = 2;
1824                 adev->gfx.config.max_texture_channel_caches = 4;
1825                 adev->gfx.config.max_gprs = 256;
1826                 adev->gfx.config.max_gs_threads = 32;
1827                 adev->gfx.config.max_hw_contexts = 8;
1828
1829                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1830                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1831                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1832                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1833                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1834                 break;
1835         }
1836
1837         mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
1838         adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1839         mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1840
1841         adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1842         adev->gfx.config.mem_max_burst_length_bytes = 256;
1843         if (adev->flags & AMD_IS_APU) {
1844                 /* Get memory bank mapping mode. */
1845                 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
1846                 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1847                 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1848
1849                 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
1850                 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1851                 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1852
1853                 /* Validate settings in case only one DIMM installed. */
1854                 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
1855                         dimm00_addr_map = 0;
1856                 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
1857                         dimm01_addr_map = 0;
1858                 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
1859                         dimm10_addr_map = 0;
1860                 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
1861                         dimm11_addr_map = 0;
1862
1863                 /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1864                 /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1865                 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
1866                         adev->gfx.config.mem_row_size_in_kb = 2;
1867                 else
1868                         adev->gfx.config.mem_row_size_in_kb = 1;
1869         } else {
1870                 tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
1871                 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1872                 if (adev->gfx.config.mem_row_size_in_kb > 4)
1873                         adev->gfx.config.mem_row_size_in_kb = 4;
1874         }
1875
1876         adev->gfx.config.shader_engine_tile_size = 32;
1877         adev->gfx.config.num_gpus = 1;
1878         adev->gfx.config.multi_gpu_tile_size = 64;
1879
1880         /* fix up row size */
1881         switch (adev->gfx.config.mem_row_size_in_kb) {
1882         case 1:
1883         default:
1884                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
1885                 break;
1886         case 2:
1887                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
1888                 break;
1889         case 4:
1890                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
1891                 break;
1892         }
1893         adev->gfx.config.gb_addr_config = gb_addr_config;
1894
1895         return 0;
1896 }
1897
1898 static int gfx_v8_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
1899                                         int mec, int pipe, int queue)
1900 {
1901         int r;
1902         unsigned irq_type;
1903         struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
1904
1905         ring = &adev->gfx.compute_ring[ring_id];
1906
1907         /* mec0 is me1 */
1908         ring->me = mec + 1;
1909         ring->pipe = pipe;
1910         ring->queue = queue;
1911
1912         ring->ring_obj = NULL;
1913         ring->use_doorbell = true;
1914         ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + ring_id;
1915         ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
1916                                 + (ring_id * GFX8_MEC_HPD_SIZE);
1917         sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
1918
1919         irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
1920                 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
1921                 + ring->pipe;
1922
1923         /* type-2 packets are deprecated on MEC, use type-3 instead */
1924         r = amdgpu_ring_init(adev, ring, 1024,
1925                         &adev->gfx.eop_irq, irq_type);
1926         if (r)
1927                 return r;
1928
1929
1930         return 0;
1931 }
1932
1933 static int gfx_v8_0_sw_init(void *handle)
1934 {
1935         int i, j, k, r, ring_id;
1936         struct amdgpu_ring *ring;
1937         struct amdgpu_kiq *kiq;
1938         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1939
1940         switch (adev->asic_type) {
1941         case CHIP_FIJI:
1942         case CHIP_TONGA:
1943         case CHIP_POLARIS11:
1944         case CHIP_POLARIS12:
1945         case CHIP_POLARIS10:
1946         case CHIP_CARRIZO:
1947                 adev->gfx.mec.num_mec = 2;
1948                 break;
1949         case CHIP_TOPAZ:
1950         case CHIP_STONEY:
1951         default:
1952                 adev->gfx.mec.num_mec = 1;
1953                 break;
1954         }
1955
1956         adev->gfx.mec.num_pipe_per_mec = 4;
1957         adev->gfx.mec.num_queue_per_pipe = 8;
1958
1959         /* KIQ event */
1960         r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 178, &adev->gfx.kiq.irq);
1961         if (r)
1962                 return r;
1963
1964         /* EOP Event */
1965         r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 181, &adev->gfx.eop_irq);
1966         if (r)
1967                 return r;
1968
1969         /* Privileged reg */
1970         r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 184,
1971                               &adev->gfx.priv_reg_irq);
1972         if (r)
1973                 return r;
1974
1975         /* Privileged inst */
1976         r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 185,
1977                               &adev->gfx.priv_inst_irq);
1978         if (r)
1979                 return r;
1980
1981         adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
1982
1983         gfx_v8_0_scratch_init(adev);
1984
1985         r = gfx_v8_0_init_microcode(adev);
1986         if (r) {
1987                 DRM_ERROR("Failed to load gfx firmware!\n");
1988                 return r;
1989         }
1990
1991         r = gfx_v8_0_rlc_init(adev);
1992         if (r) {
1993                 DRM_ERROR("Failed to init rlc BOs!\n");
1994                 return r;
1995         }
1996
1997         r = gfx_v8_0_mec_init(adev);
1998         if (r) {
1999                 DRM_ERROR("Failed to init MEC BOs!\n");
2000                 return r;
2001         }
2002
2003         /* set up the gfx ring */
2004         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2005                 ring = &adev->gfx.gfx_ring[i];
2006                 ring->ring_obj = NULL;
2007                 sprintf(ring->name, "gfx");
2008                 /* no gfx doorbells on iceland */
2009                 if (adev->asic_type != CHIP_TOPAZ) {
2010                         ring->use_doorbell = true;
2011                         ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0;
2012                 }
2013
2014                 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2015                                      AMDGPU_CP_IRQ_GFX_EOP);
2016                 if (r)
2017                         return r;
2018         }
2019
2020
2021         /* set up the compute queues - allocate horizontally across pipes */
2022         ring_id = 0;
2023         for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
2024                 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
2025                         for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
2026                                 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
2027                                         continue;
2028
2029                                 r = gfx_v8_0_compute_ring_init(adev,
2030                                                                 ring_id,
2031                                                                 i, k, j);
2032                                 if (r)
2033                                         return r;
2034
2035                                 ring_id++;
2036                         }
2037                 }
2038         }
2039
2040         r = amdgpu_gfx_kiq_init(adev, GFX8_MEC_HPD_SIZE);
2041         if (r) {
2042                 DRM_ERROR("Failed to init KIQ BOs!\n");
2043                 return r;
2044         }
2045
2046         kiq = &adev->gfx.kiq;
2047         r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
2048         if (r)
2049                 return r;
2050
2051         /* create MQD for all compute queues as well as KIQ for SRIOV case */
2052         r = amdgpu_gfx_compute_mqd_sw_init(adev, sizeof(struct vi_mqd_allocation));
2053         if (r)
2054                 return r;
2055
2056         /* reserve GDS, GWS and OA resource for gfx */
2057         r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size,
2058                                     PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS,
2059                                     &adev->gds.gds_gfx_bo, NULL, NULL);
2060         if (r)
2061                 return r;
2062
2063         r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size,
2064                                     PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS,
2065                                     &adev->gds.gws_gfx_bo, NULL, NULL);
2066         if (r)
2067                 return r;
2068
2069         r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size,
2070                                     PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA,
2071                                     &adev->gds.oa_gfx_bo, NULL, NULL);
2072         if (r)
2073                 return r;
2074
2075         adev->gfx.ce_ram_size = 0x8000;
2076
2077         r = gfx_v8_0_gpu_early_init(adev);
2078         if (r)
2079                 return r;
2080
2081         return 0;
2082 }
2083
2084 static int gfx_v8_0_sw_fini(void *handle)
2085 {
2086         int i;
2087         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2088
2089         amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
2090         amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL);
2091         amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
2092
2093         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2094                 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2095         for (i = 0; i < adev->gfx.num_compute_rings; i++)
2096                 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2097
2098         amdgpu_gfx_compute_mqd_sw_fini(adev);
2099         amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
2100         amdgpu_gfx_kiq_fini(adev);
2101
2102         gfx_v8_0_mec_fini(adev);
2103         gfx_v8_0_rlc_fini(adev);
2104         gfx_v8_0_free_microcode(adev);
2105
2106         return 0;
2107 }
2108
2109 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
2110 {
2111         uint32_t *modearray, *mod2array;
2112         const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
2113         const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
2114         u32 reg_offset;
2115
2116         modearray = adev->gfx.config.tile_mode_array;
2117         mod2array = adev->gfx.config.macrotile_mode_array;
2118
2119         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2120                 modearray[reg_offset] = 0;
2121
2122         for (reg_offset = 0; reg_offset <  num_secondary_tile_mode_states; reg_offset++)
2123                 mod2array[reg_offset] = 0;
2124
2125         switch (adev->asic_type) {
2126         case CHIP_TOPAZ:
2127                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2128                                 PIPE_CONFIG(ADDR_SURF_P2) |
2129                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2130                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2131                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2132                                 PIPE_CONFIG(ADDR_SURF_P2) |
2133                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2134                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2135                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2136                                 PIPE_CONFIG(ADDR_SURF_P2) |
2137                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2138                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2139                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2140                                 PIPE_CONFIG(ADDR_SURF_P2) |
2141                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2142                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2143                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2144                                 PIPE_CONFIG(ADDR_SURF_P2) |
2145                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2146                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2147                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2148                                 PIPE_CONFIG(ADDR_SURF_P2) |
2149                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2150                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2151                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2152                                 PIPE_CONFIG(ADDR_SURF_P2) |
2153                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2154                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2155                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2156                                 PIPE_CONFIG(ADDR_SURF_P2));
2157                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2158                                 PIPE_CONFIG(ADDR_SURF_P2) |
2159                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2160                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2161                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2162                                  PIPE_CONFIG(ADDR_SURF_P2) |
2163                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2164                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2165                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2166                                  PIPE_CONFIG(ADDR_SURF_P2) |
2167                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2168                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2169                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2170                                  PIPE_CONFIG(ADDR_SURF_P2) |
2171                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2172                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2173                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2174                                  PIPE_CONFIG(ADDR_SURF_P2) |
2175                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2176                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2177                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2178                                  PIPE_CONFIG(ADDR_SURF_P2) |
2179                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2180                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2181                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2182                                  PIPE_CONFIG(ADDR_SURF_P2) |
2183                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2184                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2185                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2186                                  PIPE_CONFIG(ADDR_SURF_P2) |
2187                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2188                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2189                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2190                                  PIPE_CONFIG(ADDR_SURF_P2) |
2191                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2192                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2193                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2194                                  PIPE_CONFIG(ADDR_SURF_P2) |
2195                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2196                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2197                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2198                                  PIPE_CONFIG(ADDR_SURF_P2) |
2199                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2200                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2201                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2202                                  PIPE_CONFIG(ADDR_SURF_P2) |
2203                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2204                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2205                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2206                                  PIPE_CONFIG(ADDR_SURF_P2) |
2207                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2208                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2209                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2210                                  PIPE_CONFIG(ADDR_SURF_P2) |
2211                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2212                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2213                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2214                                  PIPE_CONFIG(ADDR_SURF_P2) |
2215                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2216                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2217                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2218                                  PIPE_CONFIG(ADDR_SURF_P2) |
2219                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2220                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2221                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2222                                  PIPE_CONFIG(ADDR_SURF_P2) |
2223                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2224                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2225                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2226                                  PIPE_CONFIG(ADDR_SURF_P2) |
2227                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2228                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2229
2230                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2231                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2232                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2233                                 NUM_BANKS(ADDR_SURF_8_BANK));
2234                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2235                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2236                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2237                                 NUM_BANKS(ADDR_SURF_8_BANK));
2238                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2239                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2240                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2241                                 NUM_BANKS(ADDR_SURF_8_BANK));
2242                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2243                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2244                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2245                                 NUM_BANKS(ADDR_SURF_8_BANK));
2246                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2247                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2248                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2249                                 NUM_BANKS(ADDR_SURF_8_BANK));
2250                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2251                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2252                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2253                                 NUM_BANKS(ADDR_SURF_8_BANK));
2254                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2255                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2256                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2257                                 NUM_BANKS(ADDR_SURF_8_BANK));
2258                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2259                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2260                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2261                                 NUM_BANKS(ADDR_SURF_16_BANK));
2262                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2263                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2264                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2265                                 NUM_BANKS(ADDR_SURF_16_BANK));
2266                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2267                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2268                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2269                                  NUM_BANKS(ADDR_SURF_16_BANK));
2270                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2271                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2272                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2273                                  NUM_BANKS(ADDR_SURF_16_BANK));
2274                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2275                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2276                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2277                                  NUM_BANKS(ADDR_SURF_16_BANK));
2278                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2279                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2280                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2281                                  NUM_BANKS(ADDR_SURF_16_BANK));
2282                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2283                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2284                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2285                                  NUM_BANKS(ADDR_SURF_8_BANK));
2286
2287                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2288                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2289                             reg_offset != 23)
2290                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2291
2292                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2293                         if (reg_offset != 7)
2294                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2295
2296                 break;
2297         case CHIP_FIJI:
2298                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2299                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2300                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2301                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2302                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2303                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2304                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2305                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2306                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2307                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2308                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2309                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2310                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2311                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2312                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2313                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2314                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2315                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2316                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2317                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2318                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2319                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2320                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2321                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2322                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2323                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2324                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2325                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2326                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2327                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2328                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2329                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2330                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2331                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2332                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2333                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2334                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2335                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2336                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2337                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2338                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2339                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2340                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2341                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2342                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2343                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2344                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2345                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2346                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2347                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2348                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2349                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2350                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2351                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2352                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2353                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2354                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2355                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2356                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2357                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2358                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2359                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2360                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2361                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2362                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2363                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2364                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2365                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2366                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2367                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2368                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2369                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2370                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2371                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2372                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2373                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2374                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2375                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2376                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2377                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2378                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2379                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2380                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2381                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2382                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2383                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2384                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2385                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2386                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2387                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2388                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2389                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2390                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2391                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2392                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2393                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2394                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2395                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2396                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2397                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2398                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2399                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2400                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2401                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2402                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2403                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2404                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2405                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2406                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2407                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2408                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2409                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2410                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2411                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2412                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2413                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2414                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2415                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2416                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2417                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2418                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2419                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2420
2421                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2422                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2423                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2424                                 NUM_BANKS(ADDR_SURF_8_BANK));
2425                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2426                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2427                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2428                                 NUM_BANKS(ADDR_SURF_8_BANK));
2429                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2430                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2431                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2432                                 NUM_BANKS(ADDR_SURF_8_BANK));
2433                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2434                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2435                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2436                                 NUM_BANKS(ADDR_SURF_8_BANK));
2437                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2438                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2439                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2440                                 NUM_BANKS(ADDR_SURF_8_BANK));
2441                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2442                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2443                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2444                                 NUM_BANKS(ADDR_SURF_8_BANK));
2445                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2446                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2447                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2448                                 NUM_BANKS(ADDR_SURF_8_BANK));
2449                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2450                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2451                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2452                                 NUM_BANKS(ADDR_SURF_8_BANK));
2453                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2454                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2455                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2456                                 NUM_BANKS(ADDR_SURF_8_BANK));
2457                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2458                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2459                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2460                                  NUM_BANKS(ADDR_SURF_8_BANK));
2461                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2462                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2463                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2464                                  NUM_BANKS(ADDR_SURF_8_BANK));
2465                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2466                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2467                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2468                                  NUM_BANKS(ADDR_SURF_8_BANK));
2469                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2470                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2471                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2472                                  NUM_BANKS(ADDR_SURF_8_BANK));
2473                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2474                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2475                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2476                                  NUM_BANKS(ADDR_SURF_4_BANK));
2477
2478                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2479                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2480
2481                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2482                         if (reg_offset != 7)
2483                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2484
2485                 break;
2486         case CHIP_TONGA:
2487                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2488                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2489                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2490                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2491                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2492                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2493                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2494                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2495                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2496                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2497                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2498                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2499                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2500                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2501                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2502                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2503                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2504                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2505                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2506                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2507                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2508                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2509                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2510                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2511                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2512                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2513                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2514                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2515                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2516                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2517                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2518                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2519                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2520                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2521                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2522                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2523                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2524                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2525                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2526                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2527                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2528                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2529                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2530                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2531                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2532                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2533                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2534                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2535                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2536                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2537                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2538                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2539                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2540                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2541                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2542                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2543                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2544                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2545                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2546                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2547                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2548                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2549                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2550                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2551                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2552                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2553                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2554                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2555                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2556                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2557                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2558                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2559                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2560                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2561                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2562                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2563                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2564                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2565                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2566                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2567                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2568                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2569                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2570                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2571                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2572                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2573                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2574                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2575                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2576                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2577                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2578                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2579                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2580                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2581                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2582                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2583                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2584                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2585                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2586                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2587                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2588                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2589                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2590                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2591                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2592                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2593                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2594                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2595                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2596                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2597                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2598                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2599                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2600                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2601                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2602                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2603                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2604                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2605                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2606                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2607                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2608                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2609
2610                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2611                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2612                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2613                                 NUM_BANKS(ADDR_SURF_16_BANK));
2614                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2615                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2616                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2617                                 NUM_BANKS(ADDR_SURF_16_BANK));
2618                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2619                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2620                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2621                                 NUM_BANKS(ADDR_SURF_16_BANK));
2622                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2623                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2624                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2625                                 NUM_BANKS(ADDR_SURF_16_BANK));
2626                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2627                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2628                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2629                                 NUM_BANKS(ADDR_SURF_16_BANK));
2630                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2631                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2632                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2633                                 NUM_BANKS(ADDR_SURF_16_BANK));
2634                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2635                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2636                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2637                                 NUM_BANKS(ADDR_SURF_16_BANK));
2638                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2639                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2640                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2641                                 NUM_BANKS(ADDR_SURF_16_BANK));
2642                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2643                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2644                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2645                                 NUM_BANKS(ADDR_SURF_16_BANK));
2646                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2647                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2648                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2649                                  NUM_BANKS(ADDR_SURF_16_BANK));
2650                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2651                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2652                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2653                                  NUM_BANKS(ADDR_SURF_16_BANK));
2654                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2655                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2656                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2657                                  NUM_BANKS(ADDR_SURF_8_BANK));
2658                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2659                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2660                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2661                                  NUM_BANKS(ADDR_SURF_4_BANK));
2662                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2663                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2664                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2665                                  NUM_BANKS(ADDR_SURF_4_BANK));
2666
2667                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2668                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2669
2670                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2671                         if (reg_offset != 7)
2672                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2673
2674                 break;
2675         case CHIP_POLARIS11:
2676         case CHIP_POLARIS12:
2677                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2678                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2679                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2680                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2681                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2682                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2683                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2684                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2685                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2686                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2687                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2688                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2689                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2690                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2691                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2692                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2693                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2694                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2695                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2696                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2697                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2698                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2699                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2700                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2701                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2702                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2703                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2704                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2705                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2706                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2707                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2708                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2709                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2710                                 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2711                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2712                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2713                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2714                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2715                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2716                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2717                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2718                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2719                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2720                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2721                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2722                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2723                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2724                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2725                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2726                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2727                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2728                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2729                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2730                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2731                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2732                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2733                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2734                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2735                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2736                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2737                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2738                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2739                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2740                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2741                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2742                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2743                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2744                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2745                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2746                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2747                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2748                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2749                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2750                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2751                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2752                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2753                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2754                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2755                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2756                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2757                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2758                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2759                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2760                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2761                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2762                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2763                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2764                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2765                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2766                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2767                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2768                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2769                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2770                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2771                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2772                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2773                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2774                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2775                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2776                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2777                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2778                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2779                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2780                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2781                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2782                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2783                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2784                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2785                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2786                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2787                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2788                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2789                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2790                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2791                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2792                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2793                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2794                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2795                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2796                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2797                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2798                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2799
2800                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2801                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2802                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2803                                 NUM_BANKS(ADDR_SURF_16_BANK));
2804
2805                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2806                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2807                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2808                                 NUM_BANKS(ADDR_SURF_16_BANK));
2809
2810                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2811                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2812                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2813                                 NUM_BANKS(ADDR_SURF_16_BANK));
2814
2815                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2816                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2817                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2818                                 NUM_BANKS(ADDR_SURF_16_BANK));
2819
2820                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2821                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2822                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2823                                 NUM_BANKS(ADDR_SURF_16_BANK));
2824
2825                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2826                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2827                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2828                                 NUM_BANKS(ADDR_SURF_16_BANK));
2829
2830                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2831                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2832                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2833                                 NUM_BANKS(ADDR_SURF_16_BANK));
2834
2835                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2836                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2837                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2838                                 NUM_BANKS(ADDR_SURF_16_BANK));
2839
2840                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2841                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2842                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2843                                 NUM_BANKS(ADDR_SURF_16_BANK));
2844
2845                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2846                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2847                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2848                                 NUM_BANKS(ADDR_SURF_16_BANK));
2849
2850                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2851                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2852                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2853                                 NUM_BANKS(ADDR_SURF_16_BANK));
2854
2855                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2856                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2857                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2858                                 NUM_BANKS(ADDR_SURF_16_BANK));
2859
2860                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2861                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2862                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2863                                 NUM_BANKS(ADDR_SURF_8_BANK));
2864
2865                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2866                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2867                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2868                                 NUM_BANKS(ADDR_SURF_4_BANK));
2869
2870                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2871                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2872
2873                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2874                         if (reg_offset != 7)
2875                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2876
2877                 break;
2878         case CHIP_POLARIS10:
2879                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2880                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2881                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2882                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2883                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2884                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2885                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2886                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2887                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2888                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2889                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2890                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2891                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2892                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2893                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2894                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2895                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2896                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2897                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2898                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2899                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2900                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2901                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2902                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2903                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2904                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2905                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2906                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2907                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2908                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2909                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2910                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2911                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2912                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2913                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2914                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2915                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2916                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2917                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2918                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2919                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2920                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2921                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2922                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2923                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2924                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2925                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2926                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2927                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2928                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2929                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2930                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2931                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2932                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2933                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2934                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2935                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2936                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2937                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2938                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2939                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2940                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2941                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2942                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2943                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2944                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2945                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2946                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2947                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2948                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2949                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2950                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2951                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2952                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2953                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2954                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2955                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2956                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2957                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2958                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2959                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2960                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2961                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2962                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2963                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2964                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2965                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2966                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2967                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2968                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2969                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2970                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2971                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2972                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2973                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2974                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2975                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2976                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2977                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2978                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2979                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2980                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2981                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2982                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2983                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2984                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2985                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2986                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2987                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2988                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2989                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2990                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2991                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2992                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2993                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2994                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2995                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2996                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2997                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2998                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2999                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3000                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3001
3002                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3003                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3004                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3005                                 NUM_BANKS(ADDR_SURF_16_BANK));
3006
3007                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3008                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3009                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3010                                 NUM_BANKS(ADDR_SURF_16_BANK));
3011
3012                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3013                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3014                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3015                                 NUM_BANKS(ADDR_SURF_16_BANK));
3016
3017                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3018                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3019                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3020                                 NUM_BANKS(ADDR_SURF_16_BANK));
3021
3022                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3023                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3024                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3025                                 NUM_BANKS(ADDR_SURF_16_BANK));
3026
3027                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3028                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3029                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3030                                 NUM_BANKS(ADDR_SURF_16_BANK));
3031
3032                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3033                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3034                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3035                                 NUM_BANKS(ADDR_SURF_16_BANK));
3036
3037                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3038                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3039                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3040                                 NUM_BANKS(ADDR_SURF_16_BANK));
3041
3042                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3043                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3044                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3045                                 NUM_BANKS(ADDR_SURF_16_BANK));
3046
3047                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3048                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3049                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3050                                 NUM_BANKS(ADDR_SURF_16_BANK));
3051
3052                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3053                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3054                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3055                                 NUM_BANKS(ADDR_SURF_16_BANK));
3056
3057                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3058                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3059                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3060                                 NUM_BANKS(ADDR_SURF_8_BANK));
3061
3062                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3063                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3064                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3065                                 NUM_BANKS(ADDR_SURF_4_BANK));
3066
3067                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3068                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3069                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3070                                 NUM_BANKS(ADDR_SURF_4_BANK));
3071
3072                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3073                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3074
3075                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3076                         if (reg_offset != 7)
3077                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3078
3079                 break;
3080         case CHIP_STONEY:
3081                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3082                                 PIPE_CONFIG(ADDR_SURF_P2) |
3083                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3084                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3085                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3086                                 PIPE_CONFIG(ADDR_SURF_P2) |
3087                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3088                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3089                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3090                                 PIPE_CONFIG(ADDR_SURF_P2) |
3091                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3092                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3093                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3094                                 PIPE_CONFIG(ADDR_SURF_P2) |
3095                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3096                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3097                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3098                                 PIPE_CONFIG(ADDR_SURF_P2) |
3099                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3100                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3101                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3102                                 PIPE_CONFIG(ADDR_SURF_P2) |
3103                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3104                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3105                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3106                                 PIPE_CONFIG(ADDR_SURF_P2) |
3107                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3108                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3109                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3110                                 PIPE_CONFIG(ADDR_SURF_P2));
3111                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3112                                 PIPE_CONFIG(ADDR_SURF_P2) |
3113                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3114                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3115                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3116                                  PIPE_CONFIG(ADDR_SURF_P2) |
3117                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3118                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3119                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3120                                  PIPE_CONFIG(ADDR_SURF_P2) |
3121                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3122                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3123                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3124                                  PIPE_CONFIG(ADDR_SURF_P2) |
3125                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3126                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3127                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3128                                  PIPE_CONFIG(ADDR_SURF_P2) |
3129                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3130                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3131                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3132                                  PIPE_CONFIG(ADDR_SURF_P2) |
3133                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3134                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3135                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3136                                  PIPE_CONFIG(ADDR_SURF_P2) |
3137                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3138                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3139                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3140                                  PIPE_CONFIG(ADDR_SURF_P2) |
3141                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3142                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3143                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3144                                  PIPE_CONFIG(ADDR_SURF_P2) |
3145                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3146                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3147                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3148                                  PIPE_CONFIG(ADDR_SURF_P2) |
3149                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3150                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3151                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3152                                  PIPE_CONFIG(ADDR_SURF_P2) |
3153                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3154                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3155                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3156                                  PIPE_CONFIG(ADDR_SURF_P2) |
3157                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3158                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3159                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3160                                  PIPE_CONFIG(ADDR_SURF_P2) |
3161                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3162                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3163                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3164                                  PIPE_CONFIG(ADDR_SURF_P2) |
3165                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3166                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3167                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3168                                  PIPE_CONFIG(ADDR_SURF_P2) |
3169                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3170                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3171                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3172                                  PIPE_CONFIG(ADDR_SURF_P2) |
3173                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3174                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3175                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3176                                  PIPE_CONFIG(ADDR_SURF_P2) |
3177                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3178                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3179                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3180                                  PIPE_CONFIG(ADDR_SURF_P2) |
3181                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3182                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3183
3184                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3185                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3186                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3187                                 NUM_BANKS(ADDR_SURF_8_BANK));
3188                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3189                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3190                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3191                                 NUM_BANKS(ADDR_SURF_8_BANK));
3192                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3193                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3194                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3195                                 NUM_BANKS(ADDR_SURF_8_BANK));
3196                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3197                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3198                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3199                                 NUM_BANKS(ADDR_SURF_8_BANK));
3200                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3201                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3202                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3203                                 NUM_BANKS(ADDR_SURF_8_BANK));
3204                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3205                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3206                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3207                                 NUM_BANKS(ADDR_SURF_8_BANK));
3208                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3209                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3210                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3211                                 NUM_BANKS(ADDR_SURF_8_BANK));
3212                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3213                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3214                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3215                                 NUM_BANKS(ADDR_SURF_16_BANK));
3216                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3217                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3218                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3219                                 NUM_BANKS(ADDR_SURF_16_BANK));
3220                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3221                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3222                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3223                                  NUM_BANKS(ADDR_SURF_16_BANK));
3224                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3225                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3226                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3227                                  NUM_BANKS(ADDR_SURF_16_BANK));
3228                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3229                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3230                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3231                                  NUM_BANKS(ADDR_SURF_16_BANK));
3232                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3233                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3234                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3235                                  NUM_BANKS(ADDR_SURF_16_BANK));
3236                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3237                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3238                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3239                                  NUM_BANKS(ADDR_SURF_8_BANK));
3240
3241                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3242                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3243                             reg_offset != 23)
3244                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3245
3246                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3247                         if (reg_offset != 7)
3248                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3249
3250                 break;
3251         default:
3252                 dev_warn(adev->dev,
3253                          "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
3254                          adev->asic_type);
3255
3256         case CHIP_CARRIZO:
3257                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3258                                 PIPE_CONFIG(ADDR_SURF_P2) |
3259                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3260                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3261                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3262                                 PIPE_CONFIG(ADDR_SURF_P2) |
3263                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3264                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3265                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3266                                 PIPE_CONFIG(ADDR_SURF_P2) |
3267                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3268                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3269                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3270                                 PIPE_CONFIG(ADDR_SURF_P2) |
3271                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3272                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3273                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3274                                 PIPE_CONFIG(ADDR_SURF_P2) |
3275                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3276                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3277                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3278                                 PIPE_CONFIG(ADDR_SURF_P2) |
3279                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3280                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3281                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3282                                 PIPE_CONFIG(ADDR_SURF_P2) |
3283                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3284                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3285                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3286                                 PIPE_CONFIG(ADDR_SURF_P2));
3287                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3288                                 PIPE_CONFIG(ADDR_SURF_P2) |
3289                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3290                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3291                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3292                                  PIPE_CONFIG(ADDR_SURF_P2) |
3293                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3294                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3295                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3296                                  PIPE_CONFIG(ADDR_SURF_P2) |
3297                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3298                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3299                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3300                                  PIPE_CONFIG(ADDR_SURF_P2) |
3301                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3302                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3303                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3304                                  PIPE_CONFIG(ADDR_SURF_P2) |
3305                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3306                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3307                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3308                                  PIPE_CONFIG(ADDR_SURF_P2) |
3309                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3310                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3311                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3312                                  PIPE_CONFIG(ADDR_SURF_P2) |
3313                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3314                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3315                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3316                                  PIPE_CONFIG(ADDR_SURF_P2) |
3317                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3318                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3319                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3320                                  PIPE_CONFIG(ADDR_SURF_P2) |
3321                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3322                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3323                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3324                                  PIPE_CONFIG(ADDR_SURF_P2) |
3325                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3326                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3327                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3328                                  PIPE_CONFIG(ADDR_SURF_P2) |
3329                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3330                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3331                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3332                                  PIPE_CONFIG(ADDR_SURF_P2) |
3333                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3334                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3335                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3336                                  PIPE_CONFIG(ADDR_SURF_P2) |
3337                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3338                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3339                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3340                                  PIPE_CONFIG(ADDR_SURF_P2) |
3341                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3342                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3343                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3344                                  PIPE_CONFIG(ADDR_SURF_P2) |
3345                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3346                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3347                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3348                                  PIPE_CONFIG(ADDR_SURF_P2) |
3349                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3350                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3351                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3352                                  PIPE_CONFIG(ADDR_SURF_P2) |
3353                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3354                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3355                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3356                                  PIPE_CONFIG(ADDR_SURF_P2) |
3357                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3358                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3359
3360                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3361                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3362                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3363                                 NUM_BANKS(ADDR_SURF_8_BANK));
3364                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3365                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3366                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3367                                 NUM_BANKS(ADDR_SURF_8_BANK));
3368                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3369                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3370                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3371                                 NUM_BANKS(ADDR_SURF_8_BANK));
3372                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3373                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3374                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3375                                 NUM_BANKS(ADDR_SURF_8_BANK));
3376                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3377                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3378                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3379                                 NUM_BANKS(ADDR_SURF_8_BANK));
3380                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3381                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3382                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3383                                 NUM_BANKS(ADDR_SURF_8_BANK));
3384                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3385                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3386                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3387                                 NUM_BANKS(ADDR_SURF_8_BANK));
3388                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3389                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3390                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3391                                 NUM_BANKS(ADDR_SURF_16_BANK));
3392                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3393                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3394                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3395                                 NUM_BANKS(ADDR_SURF_16_BANK));
3396                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3397                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3398                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3399                                  NUM_BANKS(ADDR_SURF_16_BANK));
3400                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3401                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3402                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3403                                  NUM_BANKS(ADDR_SURF_16_BANK));
3404                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3405                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3406                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3407                                  NUM_BANKS(ADDR_SURF_16_BANK));
3408                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3409                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3410                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3411                                  NUM_BANKS(ADDR_SURF_16_BANK));
3412                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3413                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3414                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3415                                  NUM_BANKS(ADDR_SURF_8_BANK));
3416
3417                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3418                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3419                             reg_offset != 23)
3420                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3421
3422                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3423                         if (reg_offset != 7)
3424                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3425
3426                 break;
3427         }
3428 }
3429
3430 static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
3431                                   u32 se_num, u32 sh_num, u32 instance)
3432 {
3433         u32 data;
3434
3435         if (instance == 0xffffffff)
3436                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
3437         else
3438                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
3439
3440         if (se_num == 0xffffffff)
3441                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3442         else
3443                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3444
3445         if (sh_num == 0xffffffff)
3446                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3447         else
3448                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3449
3450         WREG32(mmGRBM_GFX_INDEX, data);
3451 }
3452
3453 static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
3454 {
3455         u32 data, mask;
3456
3457         data =  RREG32(mmCC_RB_BACKEND_DISABLE) |
3458                 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3459
3460         data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE);
3461
3462         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
3463                                          adev->gfx.config.max_sh_per_se);
3464
3465         return (~data) & mask;
3466 }
3467
3468 static void
3469 gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1)
3470 {
3471         switch (adev->asic_type) {
3472         case CHIP_FIJI:
3473                 *rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) |
3474                           RB_XSEL2(1) | PKR_MAP(2) |
3475                           PKR_XSEL(1) | PKR_YSEL(1) |
3476                           SE_MAP(2) | SE_XSEL(2) | SE_YSEL(3);
3477                 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) |
3478                            SE_PAIR_YSEL(2);
3479                 break;
3480         case CHIP_TONGA:
3481         case CHIP_POLARIS10:
3482                 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3483                           SE_XSEL(1) | SE_YSEL(1);
3484                 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(2) |
3485                            SE_PAIR_YSEL(2);
3486                 break;
3487         case CHIP_TOPAZ:
3488         case CHIP_CARRIZO:
3489                 *rconf |= RB_MAP_PKR0(2);
3490                 *rconf1 |= 0x0;
3491                 break;
3492         case CHIP_POLARIS11:
3493         case CHIP_POLARIS12:
3494                 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3495                           SE_XSEL(1) | SE_YSEL(1);
3496                 *rconf1 |= 0x0;
3497                 break;
3498         case CHIP_STONEY:
3499                 *rconf |= 0x0;
3500                 *rconf1 |= 0x0;
3501                 break;
3502         default:
3503                 DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type);
3504                 break;
3505         }
3506 }
3507
3508 static void
3509 gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev,
3510                                         u32 raster_config, u32 raster_config_1,
3511                                         unsigned rb_mask, unsigned num_rb)
3512 {
3513         unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1);
3514         unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1);
3515         unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2);
3516         unsigned rb_per_se = num_rb / num_se;
3517         unsigned se_mask[4];
3518         unsigned se;
3519
3520         se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask;
3521         se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask;
3522         se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask;
3523         se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask;
3524
3525         WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4));
3526         WARN_ON(!(sh_per_se == 1 || sh_per_se == 2));
3527         WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2));
3528
3529         if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
3530                              (!se_mask[2] && !se_mask[3]))) {
3531                 raster_config_1 &= ~SE_PAIR_MAP_MASK;
3532
3533                 if (!se_mask[0] && !se_mask[1]) {
3534                         raster_config_1 |=
3535                                 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3);
3536                 } else {
3537                         raster_config_1 |=
3538                                 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0);
3539                 }
3540         }
3541
3542         for (se = 0; se < num_se; se++) {
3543                 unsigned raster_config_se = raster_config;
3544                 unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
3545                 unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
3546                 int idx = (se / 2) * 2;
3547
3548                 if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
3549                         raster_config_se &= ~SE_MAP_MASK;
3550
3551                         if (!se_mask[idx]) {
3552                                 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3);
3553                         } else {
3554                                 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0);
3555                         }
3556                 }
3557
3558                 pkr0_mask &= rb_mask;
3559                 pkr1_mask &= rb_mask;
3560                 if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
3561                         raster_config_se &= ~PKR_MAP_MASK;
3562
3563                         if (!pkr0_mask) {
3564                                 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3);
3565                         } else {
3566                                 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0);
3567                         }
3568                 }
3569
3570                 if (rb_per_se >= 2) {
3571                         unsigned rb0_mask = 1 << (se * rb_per_se);
3572                         unsigned rb1_mask = rb0_mask << 1;
3573
3574                         rb0_mask &= rb_mask;
3575                         rb1_mask &= rb_mask;
3576                         if (!rb0_mask || !rb1_mask) {
3577                                 raster_config_se &= ~RB_MAP_PKR0_MASK;
3578
3579                                 if (!rb0_mask) {
3580                                         raster_config_se |=
3581                                                 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3);
3582                                 } else {
3583                                         raster_config_se |=
3584                                                 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0);
3585                                 }
3586                         }
3587
3588                         if (rb_per_se > 2) {
3589                                 rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
3590                                 rb1_mask = rb0_mask << 1;
3591                                 rb0_mask &= rb_mask;
3592                                 rb1_mask &= rb_mask;
3593                                 if (!rb0_mask || !rb1_mask) {
3594                                         raster_config_se &= ~RB_MAP_PKR1_MASK;
3595
3596                                         if (!rb0_mask) {
3597                                                 raster_config_se |=
3598                                                         RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3);
3599                                         } else {
3600                                                 raster_config_se |=
3601                                                         RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0);
3602                                         }
3603                                 }
3604                         }
3605                 }
3606
3607                 /* GRBM_GFX_INDEX has a different offset on VI */
3608                 gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff);
3609                 WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se);
3610                 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3611         }
3612
3613         /* GRBM_GFX_INDEX has a different offset on VI */
3614         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3615 }
3616
3617 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
3618 {
3619         int i, j;
3620         u32 data;
3621         u32 raster_config = 0, raster_config_1 = 0;
3622         u32 active_rbs = 0;
3623         u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
3624                                         adev->gfx.config.max_sh_per_se;
3625         unsigned num_rb_pipes;
3626
3627         mutex_lock(&adev->grbm_idx_mutex);
3628         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3629                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3630                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3631                         data = gfx_v8_0_get_rb_active_bitmap(adev);
3632                         active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
3633                                                rb_bitmap_width_per_sh);
3634                 }
3635         }
3636         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3637
3638         adev->gfx.config.backend_enable_mask = active_rbs;
3639         adev->gfx.config.num_rbs = hweight32(active_rbs);
3640
3641         num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se *
3642                              adev->gfx.config.max_shader_engines, 16);
3643
3644         gfx_v8_0_raster_config(adev, &raster_config, &raster_config_1);
3645
3646         if (!adev->gfx.config.backend_enable_mask ||
3647                         adev->gfx.config.num_rbs >= num_rb_pipes) {
3648                 WREG32(mmPA_SC_RASTER_CONFIG, raster_config);
3649                 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3650         } else {
3651                 gfx_v8_0_write_harvested_raster_configs(adev, raster_config, raster_config_1,
3652                                                         adev->gfx.config.backend_enable_mask,
3653                                                         num_rb_pipes);
3654         }
3655
3656         /* cache the values for userspace */
3657         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3658                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3659                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3660                         adev->gfx.config.rb_config[i][j].rb_backend_disable =
3661                                 RREG32(mmCC_RB_BACKEND_DISABLE);
3662                         adev->gfx.config.rb_config[i][j].user_rb_backend_disable =
3663                                 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3664                         adev->gfx.config.rb_config[i][j].raster_config =
3665                                 RREG32(mmPA_SC_RASTER_CONFIG);
3666                         adev->gfx.config.rb_config[i][j].raster_config_1 =
3667                                 RREG32(mmPA_SC_RASTER_CONFIG_1);
3668                 }
3669         }
3670         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3671         mutex_unlock(&adev->grbm_idx_mutex);
3672 }
3673
3674 /**
3675  * gfx_v8_0_init_compute_vmid - gart enable
3676  *
3677  * @adev: amdgpu_device pointer
3678  *
3679  * Initialize compute vmid sh_mem registers
3680  *
3681  */
3682 #define DEFAULT_SH_MEM_BASES    (0x6000)
3683 #define FIRST_COMPUTE_VMID      (8)
3684 #define LAST_COMPUTE_VMID       (16)
3685 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
3686 {
3687         int i;
3688         uint32_t sh_mem_config;
3689         uint32_t sh_mem_bases;
3690
3691         /*
3692          * Configure apertures:
3693          * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
3694          * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
3695          * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
3696          */
3697         sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
3698
3699         sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
3700                         SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
3701                         SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
3702                         SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
3703                         MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
3704                         SH_MEM_CONFIG__PRIVATE_ATC_MASK;
3705
3706         mutex_lock(&adev->srbm_mutex);
3707         for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
3708                 vi_srbm_select(adev, 0, 0, 0, i);
3709                 /* CP and shaders */
3710                 WREG32(mmSH_MEM_CONFIG, sh_mem_config);
3711                 WREG32(mmSH_MEM_APE1_BASE, 1);
3712                 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3713                 WREG32(mmSH_MEM_BASES, sh_mem_bases);
3714         }
3715         vi_srbm_select(adev, 0, 0, 0, 0);
3716         mutex_unlock(&adev->srbm_mutex);
3717 }
3718
3719 static void gfx_v8_0_config_init(struct amdgpu_device *adev)
3720 {
3721         switch (adev->asic_type) {
3722         default:
3723                 adev->gfx.config.double_offchip_lds_buf = 1;
3724                 break;
3725         case CHIP_CARRIZO:
3726         case CHIP_STONEY:
3727                 adev->gfx.config.double_offchip_lds_buf = 0;
3728                 break;
3729         }
3730 }
3731
3732 static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
3733 {
3734         u32 tmp, sh_static_mem_cfg;
3735         int i;
3736
3737         WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF);
3738         WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3739         WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3740         WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
3741
3742         gfx_v8_0_tiling_mode_table_init(adev);
3743         gfx_v8_0_setup_rb(adev);
3744         gfx_v8_0_get_cu_info(adev);
3745         gfx_v8_0_config_init(adev);
3746
3747         /* XXX SH_MEM regs */
3748         /* where to put LDS, scratch, GPUVM in FSA64 space */
3749         sh_static_mem_cfg = REG_SET_FIELD(0, SH_STATIC_MEM_CONFIG,
3750                                    SWIZZLE_ENABLE, 1);
3751         sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3752                                    ELEMENT_SIZE, 1);
3753         sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3754                                    INDEX_STRIDE, 3);
3755         WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg);
3756
3757         mutex_lock(&adev->srbm_mutex);
3758         for (i = 0; i < adev->vm_manager.id_mgr[0].num_ids; i++) {
3759                 vi_srbm_select(adev, 0, 0, 0, i);
3760                 /* CP and shaders */
3761                 if (i == 0) {
3762                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
3763                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3764                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3765                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3766                         WREG32(mmSH_MEM_CONFIG, tmp);
3767                         WREG32(mmSH_MEM_BASES, 0);
3768                 } else {
3769                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
3770                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3771                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3772                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3773                         WREG32(mmSH_MEM_CONFIG, tmp);
3774                         tmp = adev->mc.shared_aperture_start >> 48;
3775                         WREG32(mmSH_MEM_BASES, tmp);
3776                 }
3777
3778                 WREG32(mmSH_MEM_APE1_BASE, 1);
3779                 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3780         }
3781         vi_srbm_select(adev, 0, 0, 0, 0);
3782         mutex_unlock(&adev->srbm_mutex);
3783
3784         gfx_v8_0_init_compute_vmid(adev);
3785
3786         mutex_lock(&adev->grbm_idx_mutex);
3787         /*
3788          * making sure that the following register writes will be broadcasted
3789          * to all the shaders
3790          */
3791         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3792
3793         WREG32(mmPA_SC_FIFO_SIZE,
3794                    (adev->gfx.config.sc_prim_fifo_size_frontend <<
3795                         PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
3796                    (adev->gfx.config.sc_prim_fifo_size_backend <<
3797                         PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
3798                    (adev->gfx.config.sc_hiz_tile_fifo_size <<
3799                         PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
3800                    (adev->gfx.config.sc_earlyz_tile_fifo_size <<
3801                         PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
3802
3803         tmp = RREG32(mmSPI_ARB_PRIORITY);
3804         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS0, 2);
3805         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS1, 2);
3806         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS2, 2);
3807         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS3, 2);
3808         WREG32(mmSPI_ARB_PRIORITY, tmp);
3809
3810         mutex_unlock(&adev->grbm_idx_mutex);
3811
3812 }
3813
3814 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3815 {
3816         u32 i, j, k;
3817         u32 mask;
3818
3819         mutex_lock(&adev->grbm_idx_mutex);
3820         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3821                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3822                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3823                         for (k = 0; k < adev->usec_timeout; k++) {
3824                                 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3825                                         break;
3826                                 udelay(1);
3827                         }
3828                 }
3829         }
3830         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3831         mutex_unlock(&adev->grbm_idx_mutex);
3832
3833         mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3834                 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3835                 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3836                 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3837         for (k = 0; k < adev->usec_timeout; k++) {
3838                 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3839                         break;
3840                 udelay(1);
3841         }
3842 }
3843
3844 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3845                                                bool enable)
3846 {
3847         u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3848
3849         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
3850         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
3851         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
3852         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
3853
3854         WREG32(mmCP_INT_CNTL_RING0, tmp);
3855 }
3856
3857 static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
3858 {
3859         /* csib */
3860         WREG32(mmRLC_CSIB_ADDR_HI,
3861                         adev->gfx.rlc.clear_state_gpu_addr >> 32);
3862         WREG32(mmRLC_CSIB_ADDR_LO,
3863                         adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
3864         WREG32(mmRLC_CSIB_LENGTH,
3865                         adev->gfx.rlc.clear_state_size);
3866 }
3867
3868 static void gfx_v8_0_parse_ind_reg_list(int *register_list_format,
3869                                 int ind_offset,
3870                                 int list_size,
3871                                 int *unique_indices,
3872                                 int *indices_count,
3873                                 int max_indices,
3874                                 int *ind_start_offsets,
3875                                 int *offset_count,
3876                                 int max_offset)
3877 {
3878         int indices;
3879         bool new_entry = true;
3880
3881         for (; ind_offset < list_size; ind_offset++) {
3882
3883                 if (new_entry) {
3884                         new_entry = false;
3885                         ind_start_offsets[*offset_count] = ind_offset;
3886                         *offset_count = *offset_count + 1;
3887                         BUG_ON(*offset_count >= max_offset);
3888                 }
3889
3890                 if (register_list_format[ind_offset] == 0xFFFFFFFF) {
3891                         new_entry = true;
3892                         continue;
3893                 }
3894
3895                 ind_offset += 2;
3896
3897                 /* look for the matching indice */
3898                 for (indices = 0;
3899                         indices < *indices_count;
3900                         indices++) {
3901                         if (unique_indices[indices] ==
3902                                 register_list_format[ind_offset])
3903                                 break;
3904                 }
3905
3906                 if (indices >= *indices_count) {
3907                         unique_indices[*indices_count] =
3908                                 register_list_format[ind_offset];
3909                         indices = *indices_count;
3910                         *indices_count = *indices_count + 1;
3911                         BUG_ON(*indices_count >= max_indices);
3912                 }
3913
3914                 register_list_format[ind_offset] = indices;
3915         }
3916 }
3917
3918 static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
3919 {
3920         int i, temp, data;
3921         int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0};
3922         int indices_count = 0;
3923         int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3924         int offset_count = 0;
3925
3926         int list_size;
3927         unsigned int *register_list_format =
3928                 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
3929         if (!register_list_format)
3930                 return -ENOMEM;
3931         memcpy(register_list_format, adev->gfx.rlc.register_list_format,
3932                         adev->gfx.rlc.reg_list_format_size_bytes);
3933
3934         gfx_v8_0_parse_ind_reg_list(register_list_format,
3935                                 RLC_FormatDirectRegListLength,
3936                                 adev->gfx.rlc.reg_list_format_size_bytes >> 2,
3937                                 unique_indices,
3938                                 &indices_count,
3939                                 sizeof(unique_indices) / sizeof(int),
3940                                 indirect_start_offsets,
3941                                 &offset_count,
3942                                 sizeof(indirect_start_offsets)/sizeof(int));
3943
3944         /* save and restore list */
3945         WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1);
3946
3947         WREG32(mmRLC_SRM_ARAM_ADDR, 0);
3948         for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
3949                 WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]);
3950
3951         /* indirect list */
3952         WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start);
3953         for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
3954                 WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]);
3955
3956         list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
3957         list_size = list_size >> 1;
3958         WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size);
3959         WREG32(mmRLC_GPM_SCRATCH_DATA, list_size);
3960
3961         /* starting offsets starts */
3962         WREG32(mmRLC_GPM_SCRATCH_ADDR,
3963                 adev->gfx.rlc.starting_offsets_start);
3964         for (i = 0; i < sizeof(indirect_start_offsets)/sizeof(int); i++)
3965                 WREG32(mmRLC_GPM_SCRATCH_DATA,
3966                                 indirect_start_offsets[i]);
3967
3968         /* unique indices */
3969         temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
3970         data = mmRLC_SRM_INDEX_CNTL_DATA_0;
3971         for (i = 0; i < sizeof(unique_indices) / sizeof(int); i++) {
3972                 if (unique_indices[i] != 0) {
3973                         WREG32(temp + i, unique_indices[i] & 0x3FFFF);
3974                         WREG32(data + i, unique_indices[i] >> 20);
3975                 }
3976         }
3977         kfree(register_list_format);
3978
3979         return 0;
3980 }
3981
3982 static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
3983 {
3984         WREG32_FIELD(RLC_SRM_CNTL, SRM_ENABLE, 1);
3985 }
3986
3987 static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev)
3988 {
3989         uint32_t data;
3990
3991         WREG32_FIELD(CP_RB_WPTR_POLL_CNTL, IDLE_POLL_COUNT, 0x60);
3992
3993         data = REG_SET_FIELD(0, RLC_PG_DELAY, POWER_UP_DELAY, 0x10);
3994         data = REG_SET_FIELD(data, RLC_PG_DELAY, POWER_DOWN_DELAY, 0x10);
3995         data = REG_SET_FIELD(data, RLC_PG_DELAY, CMD_PROPAGATE_DELAY, 0x10);
3996         data = REG_SET_FIELD(data, RLC_PG_DELAY, MEM_SLEEP_DELAY, 0x10);
3997         WREG32(mmRLC_PG_DELAY, data);
3998
3999         WREG32_FIELD(RLC_PG_DELAY_2, SERDES_CMD_DELAY, 0x3);
4000         WREG32_FIELD(RLC_AUTO_PG_CTRL, GRBM_REG_SAVE_GFX_IDLE_THRESHOLD, 0x55f0);
4001
4002 }
4003
4004 static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
4005                                                 bool enable)
4006 {
4007         WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PU_ENABLE, enable ? 1 : 0);
4008 }
4009
4010 static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
4011                                                   bool enable)
4012 {
4013         WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PD_ENABLE, enable ? 1 : 0);
4014 }
4015
4016 static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable)
4017 {
4018         WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 0 : 1);
4019 }
4020
4021 static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
4022 {
4023         if ((adev->asic_type == CHIP_CARRIZO) ||
4024             (adev->asic_type == CHIP_STONEY)) {
4025                 gfx_v8_0_init_csb(adev);
4026                 gfx_v8_0_init_save_restore_list(adev);
4027                 gfx_v8_0_enable_save_restore_machine(adev);
4028                 WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
4029                 gfx_v8_0_init_power_gating(adev);
4030                 WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
4031         } else if ((adev->asic_type == CHIP_POLARIS11) ||
4032                    (adev->asic_type == CHIP_POLARIS12)) {
4033                 gfx_v8_0_init_csb(adev);
4034                 gfx_v8_0_init_save_restore_list(adev);
4035                 gfx_v8_0_enable_save_restore_machine(adev);
4036                 gfx_v8_0_init_power_gating(adev);
4037         }
4038
4039 }
4040
4041 static void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
4042 {
4043         WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 0);
4044
4045         gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4046         gfx_v8_0_wait_for_rlc_serdes(adev);
4047 }
4048
4049 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
4050 {
4051         WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4052         udelay(50);
4053
4054         WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
4055         udelay(50);
4056 }
4057
4058 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
4059 {
4060         WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 1);
4061
4062         /* carrizo do enable cp interrupt after cp inited */
4063         if (!(adev->flags & AMD_IS_APU))
4064                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4065
4066         udelay(50);
4067 }
4068
4069 static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev)
4070 {
4071         const struct rlc_firmware_header_v2_0 *hdr;
4072         const __le32 *fw_data;
4073         unsigned i, fw_size;
4074
4075         if (!adev->gfx.rlc_fw)
4076                 return -EINVAL;
4077
4078         hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
4079         amdgpu_ucode_print_rlc_hdr(&hdr->header);
4080
4081         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
4082                            le32_to_cpu(hdr->header.ucode_array_offset_bytes));
4083         fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
4084
4085         WREG32(mmRLC_GPM_UCODE_ADDR, 0);
4086         for (i = 0; i < fw_size; i++)
4087                 WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
4088         WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
4089
4090         return 0;
4091 }
4092
4093 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
4094 {
4095         int r;
4096         u32 tmp;
4097
4098         gfx_v8_0_rlc_stop(adev);
4099
4100         /* disable CG */
4101         tmp = RREG32(mmRLC_CGCG_CGLS_CTRL);
4102         tmp &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
4103                  RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4104         WREG32(mmRLC_CGCG_CGLS_CTRL, tmp);
4105         if (adev->asic_type == CHIP_POLARIS11 ||
4106             adev->asic_type == CHIP_POLARIS10 ||
4107             adev->asic_type == CHIP_POLARIS12) {
4108                 tmp = RREG32(mmRLC_CGCG_CGLS_CTRL_3D);
4109                 tmp &= ~0x3;
4110                 WREG32(mmRLC_CGCG_CGLS_CTRL_3D, tmp);
4111         }
4112
4113         /* disable PG */
4114         WREG32(mmRLC_PG_CNTL, 0);
4115
4116         gfx_v8_0_rlc_reset(adev);
4117         gfx_v8_0_init_pg(adev);
4118
4119         if (!adev->pp_enabled) {
4120                 if (adev->firmware.load_type != AMDGPU_FW_LOAD_SMU) {
4121                         /* legacy rlc firmware loading */
4122                         r = gfx_v8_0_rlc_load_microcode(adev);
4123                         if (r)
4124                                 return r;
4125                 } else {
4126                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4127                                                         AMDGPU_UCODE_ID_RLC_G);
4128                         if (r)
4129                                 return -EINVAL;
4130                 }
4131         }
4132
4133         gfx_v8_0_rlc_start(adev);
4134
4135         return 0;
4136 }
4137
4138 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
4139 {
4140         int i;
4141         u32 tmp = RREG32(mmCP_ME_CNTL);
4142
4143         if (enable) {
4144                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
4145                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
4146                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
4147         } else {
4148                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
4149                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
4150                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
4151                 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
4152                         adev->gfx.gfx_ring[i].ready = false;
4153         }
4154         WREG32(mmCP_ME_CNTL, tmp);
4155         udelay(50);
4156 }
4157
4158 static int gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
4159 {
4160         const struct gfx_firmware_header_v1_0 *pfp_hdr;
4161         const struct gfx_firmware_header_v1_0 *ce_hdr;
4162         const struct gfx_firmware_header_v1_0 *me_hdr;
4163         const __le32 *fw_data;
4164         unsigned i, fw_size;
4165
4166         if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
4167                 return -EINVAL;
4168
4169         pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
4170                 adev->gfx.pfp_fw->data;
4171         ce_hdr = (const struct gfx_firmware_header_v1_0 *)
4172                 adev->gfx.ce_fw->data;
4173         me_hdr = (const struct gfx_firmware_header_v1_0 *)
4174                 adev->gfx.me_fw->data;
4175
4176         amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
4177         amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
4178         amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
4179
4180         gfx_v8_0_cp_gfx_enable(adev, false);
4181
4182         /* PFP */
4183         fw_data = (const __le32 *)
4184                 (adev->gfx.pfp_fw->data +
4185                  le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
4186         fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
4187         WREG32(mmCP_PFP_UCODE_ADDR, 0);
4188         for (i = 0; i < fw_size; i++)
4189                 WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
4190         WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
4191
4192         /* CE */
4193         fw_data = (const __le32 *)
4194                 (adev->gfx.ce_fw->data +
4195                  le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
4196         fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
4197         WREG32(mmCP_CE_UCODE_ADDR, 0);
4198         for (i = 0; i < fw_size; i++)
4199                 WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
4200         WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
4201
4202         /* ME */
4203         fw_data = (const __le32 *)
4204                 (adev->gfx.me_fw->data +
4205                  le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
4206         fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
4207         WREG32(mmCP_ME_RAM_WADDR, 0);
4208         for (i = 0; i < fw_size; i++)
4209                 WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
4210         WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
4211
4212         return 0;
4213 }
4214
4215 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
4216 {
4217         u32 count = 0;
4218         const struct cs_section_def *sect = NULL;
4219         const struct cs_extent_def *ext = NULL;
4220
4221         /* begin clear state */
4222         count += 2;
4223         /* context control state */
4224         count += 3;
4225
4226         for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4227                 for (ext = sect->section; ext->extent != NULL; ++ext) {
4228                         if (sect->id == SECT_CONTEXT)
4229                                 count += 2 + ext->reg_count;
4230                         else
4231                                 return 0;
4232                 }
4233         }
4234         /* pa_sc_raster_config/pa_sc_raster_config1 */
4235         count += 4;
4236         /* end clear state */
4237         count += 2;
4238         /* clear state */
4239         count += 2;
4240
4241         return count;
4242 }
4243
4244 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
4245 {
4246         struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
4247         const struct cs_section_def *sect = NULL;
4248         const struct cs_extent_def *ext = NULL;
4249         int r, i;
4250
4251         /* init the CP */
4252         WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
4253         WREG32(mmCP_ENDIAN_SWAP, 0);
4254         WREG32(mmCP_DEVICE_ID, 1);
4255
4256         gfx_v8_0_cp_gfx_enable(adev, true);
4257
4258         r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
4259         if (r) {
4260                 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
4261                 return r;
4262         }
4263
4264         /* clear state buffer */
4265         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4266         amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4267
4268         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4269         amdgpu_ring_write(ring, 0x80000000);
4270         amdgpu_ring_write(ring, 0x80000000);
4271
4272         for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4273                 for (ext = sect->section; ext->extent != NULL; ++ext) {
4274                         if (sect->id == SECT_CONTEXT) {
4275                                 amdgpu_ring_write(ring,
4276                                        PACKET3(PACKET3_SET_CONTEXT_REG,
4277                                                ext->reg_count));
4278                                 amdgpu_ring_write(ring,
4279                                        ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
4280                                 for (i = 0; i < ext->reg_count; i++)
4281                                         amdgpu_ring_write(ring, ext->extent[i]);
4282                         }
4283                 }
4284         }
4285
4286         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4287         amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
4288         switch (adev->asic_type) {
4289         case CHIP_TONGA:
4290         case CHIP_POLARIS10:
4291                 amdgpu_ring_write(ring, 0x16000012);
4292                 amdgpu_ring_write(ring, 0x0000002A);
4293                 break;
4294         case CHIP_POLARIS11:
4295         case CHIP_POLARIS12:
4296                 amdgpu_ring_write(ring, 0x16000012);
4297                 amdgpu_ring_write(ring, 0x00000000);
4298                 break;
4299         case CHIP_FIJI:
4300                 amdgpu_ring_write(ring, 0x3a00161a);
4301                 amdgpu_ring_write(ring, 0x0000002e);
4302                 break;
4303         case CHIP_CARRIZO:
4304                 amdgpu_ring_write(ring, 0x00000002);
4305                 amdgpu_ring_write(ring, 0x00000000);
4306                 break;
4307         case CHIP_TOPAZ:
4308                 amdgpu_ring_write(ring, adev->gfx.config.num_rbs == 1 ?
4309                                 0x00000000 : 0x00000002);
4310                 amdgpu_ring_write(ring, 0x00000000);
4311                 break;
4312         case CHIP_STONEY:
4313                 amdgpu_ring_write(ring, 0x00000000);
4314                 amdgpu_ring_write(ring, 0x00000000);
4315                 break;
4316         default:
4317                 BUG();
4318         }
4319
4320         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4321         amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4322
4323         amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4324         amdgpu_ring_write(ring, 0);
4325
4326         /* init the CE partitions */
4327         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4328         amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4329         amdgpu_ring_write(ring, 0x8000);
4330         amdgpu_ring_write(ring, 0x8000);
4331
4332         amdgpu_ring_commit(ring);
4333
4334         return 0;
4335 }
4336 static void gfx_v8_0_set_cpg_door_bell(struct amdgpu_device *adev, struct amdgpu_ring *ring)
4337 {
4338         u32 tmp;
4339         /* no gfx doorbells on iceland */
4340         if (adev->asic_type == CHIP_TOPAZ)
4341                 return;
4342
4343         tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
4344
4345         if (ring->use_doorbell) {
4346                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4347                                 DOORBELL_OFFSET, ring->doorbell_index);
4348                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4349                                                 DOORBELL_HIT, 0);
4350                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4351                                             DOORBELL_EN, 1);
4352         } else {
4353                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
4354         }
4355
4356         WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
4357
4358         if (adev->flags & AMD_IS_APU)
4359                 return;
4360
4361         tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
4362                                         DOORBELL_RANGE_LOWER,
4363                                         AMDGPU_DOORBELL_GFX_RING0);
4364         WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
4365
4366         WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
4367                 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
4368 }
4369
4370 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
4371 {
4372         struct amdgpu_ring *ring;
4373         u32 tmp;
4374         u32 rb_bufsz;
4375         u64 rb_addr, rptr_addr, wptr_gpu_addr;
4376         int r;
4377
4378         /* Set the write pointer delay */
4379         WREG32(mmCP_RB_WPTR_DELAY, 0);
4380
4381         /* set the RB to use vmid 0 */
4382         WREG32(mmCP_RB_VMID, 0);
4383
4384         /* Set ring buffer size */
4385         ring = &adev->gfx.gfx_ring[0];
4386         rb_bufsz = order_base_2(ring->ring_size / 8);
4387         tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
4388         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
4389         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
4390         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
4391 #ifdef __BIG_ENDIAN
4392         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
4393 #endif
4394         WREG32(mmCP_RB0_CNTL, tmp);
4395
4396         /* Initialize the ring buffer's read and write pointers */
4397         WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
4398         ring->wptr = 0;
4399         WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
4400
4401         /* set the wb address wether it's enabled or not */
4402         rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4403         WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
4404         WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
4405
4406         wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4407         WREG32(mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
4408         WREG32(mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
4409         mdelay(1);
4410         WREG32(mmCP_RB0_CNTL, tmp);
4411
4412         rb_addr = ring->gpu_addr >> 8;
4413         WREG32(mmCP_RB0_BASE, rb_addr);
4414         WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
4415
4416         gfx_v8_0_set_cpg_door_bell(adev, ring);
4417         /* start the ring */
4418         amdgpu_ring_clear_ring(ring);
4419         gfx_v8_0_cp_gfx_start(adev);
4420         ring->ready = true;
4421         r = amdgpu_ring_test_ring(ring);
4422         if (r)
4423                 ring->ready = false;
4424
4425         return r;
4426 }
4427
4428 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
4429 {
4430         int i;
4431
4432         if (enable) {
4433                 WREG32(mmCP_MEC_CNTL, 0);
4434         } else {
4435                 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
4436                 for (i = 0; i < adev->gfx.num_compute_rings; i++)
4437                         adev->gfx.compute_ring[i].ready = false;
4438                 adev->gfx.kiq.ring.ready = false;
4439         }
4440         udelay(50);
4441 }
4442
4443 static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev)
4444 {
4445         const struct gfx_firmware_header_v1_0 *mec_hdr;
4446         const __le32 *fw_data;
4447         unsigned i, fw_size;
4448
4449         if (!adev->gfx.mec_fw)
4450                 return -EINVAL;
4451
4452         gfx_v8_0_cp_compute_enable(adev, false);
4453
4454         mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
4455         amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
4456
4457         fw_data = (const __le32 *)
4458                 (adev->gfx.mec_fw->data +
4459                  le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4460         fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4461
4462         /* MEC1 */
4463         WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0);
4464         for (i = 0; i < fw_size; i++)
4465                 WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data+i));
4466         WREG32(mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version);
4467
4468         /* Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
4469         if (adev->gfx.mec2_fw) {
4470                 const struct gfx_firmware_header_v1_0 *mec2_hdr;
4471
4472                 mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
4473                 amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header);
4474
4475                 fw_data = (const __le32 *)
4476                         (adev->gfx.mec2_fw->data +
4477                          le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4478                 fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4479
4480                 WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0);
4481                 for (i = 0; i < fw_size; i++)
4482                         WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data+i));
4483                 WREG32(mmCP_MEC_ME2_UCODE_ADDR, adev->gfx.mec2_fw_version);
4484         }
4485
4486         return 0;
4487 }
4488
4489 /* KIQ functions */
4490 static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring)
4491 {
4492         uint32_t tmp;
4493         struct amdgpu_device *adev = ring->adev;
4494
4495         /* tell RLC which is KIQ queue */
4496         tmp = RREG32(mmRLC_CP_SCHEDULERS);
4497         tmp &= 0xffffff00;
4498         tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
4499         WREG32(mmRLC_CP_SCHEDULERS, tmp);
4500         tmp |= 0x80;
4501         WREG32(mmRLC_CP_SCHEDULERS, tmp);
4502 }
4503
4504 static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev)
4505 {
4506         struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
4507         uint32_t scratch, tmp = 0;
4508         uint64_t queue_mask = 0;
4509         int r, i;
4510
4511         for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
4512                 if (!test_bit(i, adev->gfx.mec.queue_bitmap))
4513                         continue;
4514
4515                 /* This situation may be hit in the future if a new HW
4516                  * generation exposes more than 64 queues. If so, the
4517                  * definition of queue_mask needs updating */
4518                 if (WARN_ON(i >= (sizeof(queue_mask)*8))) {
4519                         DRM_ERROR("Invalid KCQ enabled: %d\n", i);
4520                         break;
4521                 }
4522
4523                 queue_mask |= (1ull << i);
4524         }
4525
4526         r = amdgpu_gfx_scratch_get(adev, &scratch);
4527         if (r) {
4528                 DRM_ERROR("Failed to get scratch reg (%d).\n", r);
4529                 return r;
4530         }
4531         WREG32(scratch, 0xCAFEDEAD);
4532
4533         r = amdgpu_ring_alloc(kiq_ring, (8 * adev->gfx.num_compute_rings) + 11);
4534         if (r) {
4535                 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
4536                 amdgpu_gfx_scratch_free(adev, scratch);
4537                 return r;
4538         }
4539         /* set resources */
4540         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
4541         amdgpu_ring_write(kiq_ring, 0); /* vmid_mask:0 queue_type:0 (KIQ) */
4542         amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */
4543         amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */
4544         amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
4545         amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
4546         amdgpu_ring_write(kiq_ring, 0); /* oac mask */
4547         amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
4548         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4549                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4550                 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
4551                 uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4552
4553                 /* map queues */
4554                 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
4555                 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
4556                 amdgpu_ring_write(kiq_ring,
4557                                   PACKET3_MAP_QUEUES_NUM_QUEUES(1));
4558                 amdgpu_ring_write(kiq_ring,
4559                                   PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index) |
4560                                   PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
4561                                   PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
4562                                   PACKET3_MAP_QUEUES_ME(ring->me == 1 ? 0 : 1)); /* doorbell */
4563                 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
4564                 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
4565                 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
4566                 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
4567         }
4568         /* write to scratch for completion */
4569         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
4570         amdgpu_ring_write(kiq_ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
4571         amdgpu_ring_write(kiq_ring, 0xDEADBEEF);
4572         amdgpu_ring_commit(kiq_ring);
4573
4574         for (i = 0; i < adev->usec_timeout; i++) {
4575                 tmp = RREG32(scratch);
4576                 if (tmp == 0xDEADBEEF)
4577                         break;
4578                 DRM_UDELAY(1);
4579         }
4580         if (i >= adev->usec_timeout) {
4581                 DRM_ERROR("KCQ enable failed (scratch(0x%04X)=0x%08X)\n",
4582                           scratch, tmp);
4583                 r = -EINVAL;
4584         }
4585         amdgpu_gfx_scratch_free(adev, scratch);
4586
4587         return r;
4588 }
4589
4590 static int gfx_v8_0_deactivate_hqd(struct amdgpu_device *adev, u32 req)
4591 {
4592         int i, r = 0;
4593
4594         if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) {
4595                 WREG32_FIELD(CP_HQD_DEQUEUE_REQUEST, DEQUEUE_REQ, req);
4596                 for (i = 0; i < adev->usec_timeout; i++) {
4597                         if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK))
4598                                 break;
4599                         udelay(1);
4600                 }
4601                 if (i == adev->usec_timeout)
4602                         r = -ETIMEDOUT;
4603         }
4604         WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
4605         WREG32(mmCP_HQD_PQ_RPTR, 0);
4606         WREG32(mmCP_HQD_PQ_WPTR, 0);
4607
4608         return r;
4609 }
4610
4611 static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
4612 {
4613         struct amdgpu_device *adev = ring->adev;
4614         struct vi_mqd *mqd = ring->mqd_ptr;
4615         uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
4616         uint32_t tmp;
4617
4618         mqd->header = 0xC0310800;
4619         mqd->compute_pipelinestat_enable = 0x00000001;
4620         mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4621         mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4622         mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4623         mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4624         mqd->compute_misc_reserved = 0x00000003;
4625         mqd->dynamic_cu_mask_addr_lo = lower_32_bits(ring->mqd_gpu_addr
4626                                                      + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4627         mqd->dynamic_cu_mask_addr_hi = upper_32_bits(ring->mqd_gpu_addr
4628                                                      + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4629         eop_base_addr = ring->eop_gpu_addr >> 8;
4630         mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
4631         mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
4632
4633         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4634         tmp = RREG32(mmCP_HQD_EOP_CONTROL);
4635         tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
4636                         (order_base_2(GFX8_MEC_HPD_SIZE / 4) - 1));
4637
4638         mqd->cp_hqd_eop_control = tmp;
4639
4640         /* enable doorbell? */
4641         tmp = REG_SET_FIELD(RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL),
4642                             CP_HQD_PQ_DOORBELL_CONTROL,
4643                             DOORBELL_EN,
4644                             ring->use_doorbell ? 1 : 0);
4645
4646         mqd->cp_hqd_pq_doorbell_control = tmp;
4647
4648         /* set the pointer to the MQD */
4649         mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
4650         mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
4651
4652         /* set MQD vmid to 0 */
4653         tmp = RREG32(mmCP_MQD_CONTROL);
4654         tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4655         mqd->cp_mqd_control = tmp;
4656
4657         /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4658         hqd_gpu_addr = ring->gpu_addr >> 8;
4659         mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4660         mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4661
4662         /* set up the HQD, this is similar to CP_RB0_CNTL */
4663         tmp = RREG32(mmCP_HQD_PQ_CONTROL);
4664         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4665                             (order_base_2(ring->ring_size / 4) - 1));
4666         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4667                         ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
4668 #ifdef __BIG_ENDIAN
4669         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
4670 #endif
4671         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
4672         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
4673         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4674         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4675         mqd->cp_hqd_pq_control = tmp;
4676
4677         /* set the wb address whether it's enabled or not */
4678         wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4679         mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4680         mqd->cp_hqd_pq_rptr_report_addr_hi =
4681                 upper_32_bits(wb_gpu_addr) & 0xffff;
4682
4683         /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4684         wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4685         mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
4686         mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4687
4688         tmp = 0;
4689         /* enable the doorbell if requested */
4690         if (ring->use_doorbell) {
4691                 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4692                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4693                                 DOORBELL_OFFSET, ring->doorbell_index);
4694
4695                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4696                                          DOORBELL_EN, 1);
4697                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4698                                          DOORBELL_SOURCE, 0);
4699                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4700                                          DOORBELL_HIT, 0);
4701         }
4702
4703         mqd->cp_hqd_pq_doorbell_control = tmp;
4704
4705         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4706         ring->wptr = 0;
4707         mqd->cp_hqd_pq_wptr = ring->wptr;
4708         mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
4709
4710         /* set the vmid for the queue */
4711         mqd->cp_hqd_vmid = 0;
4712
4713         tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
4714         tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
4715         mqd->cp_hqd_persistent_state = tmp;
4716
4717         /* set MTYPE */
4718         tmp = RREG32(mmCP_HQD_IB_CONTROL);
4719         tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
4720         tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MTYPE, 3);
4721         mqd->cp_hqd_ib_control = tmp;
4722
4723         tmp = RREG32(mmCP_HQD_IQ_TIMER);
4724         tmp = REG_SET_FIELD(tmp, CP_HQD_IQ_TIMER, MTYPE, 3);
4725         mqd->cp_hqd_iq_timer = tmp;
4726
4727         tmp = RREG32(mmCP_HQD_CTX_SAVE_CONTROL);
4728         tmp = REG_SET_FIELD(tmp, CP_HQD_CTX_SAVE_CONTROL, MTYPE, 3);
4729         mqd->cp_hqd_ctx_save_control = tmp;
4730
4731         /* defaults */
4732         mqd->cp_hqd_eop_rptr = RREG32(mmCP_HQD_EOP_RPTR);
4733         mqd->cp_hqd_eop_wptr = RREG32(mmCP_HQD_EOP_WPTR);
4734         mqd->cp_hqd_pipe_priority = RREG32(mmCP_HQD_PIPE_PRIORITY);
4735         mqd->cp_hqd_queue_priority = RREG32(mmCP_HQD_QUEUE_PRIORITY);
4736         mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM);
4737         mqd->cp_hqd_ctx_save_base_addr_lo = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_LO);
4738         mqd->cp_hqd_ctx_save_base_addr_hi = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_HI);
4739         mqd->cp_hqd_cntl_stack_offset = RREG32(mmCP_HQD_CNTL_STACK_OFFSET);
4740         mqd->cp_hqd_cntl_stack_size = RREG32(mmCP_HQD_CNTL_STACK_SIZE);
4741         mqd->cp_hqd_wg_state_offset = RREG32(mmCP_HQD_WG_STATE_OFFSET);
4742         mqd->cp_hqd_ctx_save_size = RREG32(mmCP_HQD_CTX_SAVE_SIZE);
4743         mqd->cp_hqd_eop_done_events = RREG32(mmCP_HQD_EOP_EVENTS);
4744         mqd->cp_hqd_error = RREG32(mmCP_HQD_ERROR);
4745         mqd->cp_hqd_eop_wptr_mem = RREG32(mmCP_HQD_EOP_WPTR_MEM);
4746         mqd->cp_hqd_eop_dones = RREG32(mmCP_HQD_EOP_DONES);
4747
4748         /* activate the queue */
4749         mqd->cp_hqd_active = 1;
4750
4751         return 0;
4752 }
4753
4754 int gfx_v8_0_mqd_commit(struct amdgpu_device *adev,
4755                         struct vi_mqd *mqd)
4756 {
4757         uint32_t mqd_reg;
4758         uint32_t *mqd_data;
4759
4760         /* HQD registers extend from mmCP_MQD_BASE_ADDR to mmCP_HQD_ERROR */
4761         mqd_data = &mqd->cp_mqd_base_addr_lo;
4762
4763         /* disable wptr polling */
4764         WREG32_FIELD(CP_PQ_WPTR_POLL_CNTL, EN, 0);
4765
4766         /* program all HQD registers */
4767         for (mqd_reg = mmCP_HQD_VMID; mqd_reg <= mmCP_HQD_EOP_CONTROL; mqd_reg++)
4768                 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4769
4770         /* Tonga errata: EOP RPTR/WPTR should be left unmodified.
4771          * This is safe since EOP RPTR==WPTR for any inactive HQD
4772          * on ASICs that do not support context-save.
4773          * EOP writes/reads can start anywhere in the ring.
4774          */
4775         if (adev->asic_type != CHIP_TONGA) {
4776                 WREG32(mmCP_HQD_EOP_RPTR, mqd->cp_hqd_eop_rptr);
4777                 WREG32(mmCP_HQD_EOP_WPTR, mqd->cp_hqd_eop_wptr);
4778                 WREG32(mmCP_HQD_EOP_WPTR_MEM, mqd->cp_hqd_eop_wptr_mem);
4779         }
4780
4781         for (mqd_reg = mmCP_HQD_EOP_EVENTS; mqd_reg <= mmCP_HQD_ERROR; mqd_reg++)
4782                 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4783
4784         /* activate the HQD */
4785         for (mqd_reg = mmCP_MQD_BASE_ADDR; mqd_reg <= mmCP_HQD_ACTIVE; mqd_reg++)
4786                 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4787
4788         return 0;
4789 }
4790
4791 static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring)
4792 {
4793         struct amdgpu_device *adev = ring->adev;
4794         struct vi_mqd *mqd = ring->mqd_ptr;
4795         int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
4796
4797         gfx_v8_0_kiq_setting(ring);
4798
4799         if (adev->gfx.in_reset) { /* for GPU_RESET case */
4800                 /* reset MQD to a clean status */
4801                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4802                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4803
4804                 /* reset ring buffer */
4805                 ring->wptr = 0;
4806                 amdgpu_ring_clear_ring(ring);
4807                 mutex_lock(&adev->srbm_mutex);
4808                 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4809                 gfx_v8_0_mqd_commit(adev, mqd);
4810                 vi_srbm_select(adev, 0, 0, 0, 0);
4811                 mutex_unlock(&adev->srbm_mutex);
4812         } else {
4813                 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4814                 ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4815                 ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
4816                 mutex_lock(&adev->srbm_mutex);
4817                 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4818                 gfx_v8_0_mqd_init(ring);
4819                 gfx_v8_0_mqd_commit(adev, mqd);
4820                 vi_srbm_select(adev, 0, 0, 0, 0);
4821                 mutex_unlock(&adev->srbm_mutex);
4822
4823                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4824                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4825         }
4826
4827         return 0;
4828 }
4829
4830 static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring)
4831 {
4832         struct amdgpu_device *adev = ring->adev;
4833         struct vi_mqd *mqd = ring->mqd_ptr;
4834         int mqd_idx = ring - &adev->gfx.compute_ring[0];
4835
4836         if (!adev->gfx.in_reset && !adev->gfx.in_suspend) {
4837                 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4838                 ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4839                 ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
4840                 mutex_lock(&adev->srbm_mutex);
4841                 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4842                 gfx_v8_0_mqd_init(ring);
4843                 vi_srbm_select(adev, 0, 0, 0, 0);
4844                 mutex_unlock(&adev->srbm_mutex);
4845
4846                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4847                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4848         } else if (adev->gfx.in_reset) { /* for GPU_RESET case */
4849                 /* reset MQD to a clean status */
4850                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4851                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4852                 /* reset ring buffer */
4853                 ring->wptr = 0;
4854                 amdgpu_ring_clear_ring(ring);
4855         } else {
4856                 amdgpu_ring_clear_ring(ring);
4857         }
4858         return 0;
4859 }
4860
4861 static void gfx_v8_0_set_mec_doorbell_range(struct amdgpu_device *adev)
4862 {
4863         if (adev->asic_type > CHIP_TONGA) {
4864                 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, AMDGPU_DOORBELL_KIQ << 2);
4865                 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, AMDGPU_DOORBELL_MEC_RING7 << 2);
4866         }
4867         /* enable doorbells */
4868         WREG32_FIELD(CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4869 }
4870
4871 static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev)
4872 {
4873         struct amdgpu_ring *ring = NULL;
4874         int r = 0, i;
4875
4876         gfx_v8_0_cp_compute_enable(adev, true);
4877
4878         ring = &adev->gfx.kiq.ring;
4879
4880         r = amdgpu_bo_reserve(ring->mqd_obj, false);
4881         if (unlikely(r != 0))
4882                 goto done;
4883
4884         r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4885         if (!r) {
4886                 r = gfx_v8_0_kiq_init_queue(ring);
4887                 amdgpu_bo_kunmap(ring->mqd_obj);
4888                 ring->mqd_ptr = NULL;
4889         }
4890         amdgpu_bo_unreserve(ring->mqd_obj);
4891         if (r)
4892                 goto done;
4893
4894         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4895                 ring = &adev->gfx.compute_ring[i];
4896
4897                 r = amdgpu_bo_reserve(ring->mqd_obj, false);
4898                 if (unlikely(r != 0))
4899                         goto done;
4900                 r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4901                 if (!r) {
4902                         r = gfx_v8_0_kcq_init_queue(ring);
4903                         amdgpu_bo_kunmap(ring->mqd_obj);
4904                         ring->mqd_ptr = NULL;
4905                 }
4906                 amdgpu_bo_unreserve(ring->mqd_obj);
4907                 if (r)
4908                         goto done;
4909         }
4910
4911         gfx_v8_0_set_mec_doorbell_range(adev);
4912
4913         r = gfx_v8_0_kiq_kcq_enable(adev);
4914         if (r)
4915                 goto done;
4916
4917         /* Test KIQ */
4918         ring = &adev->gfx.kiq.ring;
4919         ring->ready = true;
4920         r = amdgpu_ring_test_ring(ring);
4921         if (r) {
4922                 ring->ready = false;
4923                 goto done;
4924         }
4925
4926         /* Test KCQs */
4927         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4928                 ring = &adev->gfx.compute_ring[i];
4929                 ring->ready = true;
4930                 r = amdgpu_ring_test_ring(ring);
4931                 if (r)
4932                         ring->ready = false;
4933         }
4934
4935 done:
4936         return r;
4937 }
4938
4939 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
4940 {
4941         int r;
4942
4943         if (!(adev->flags & AMD_IS_APU))
4944                 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4945
4946         if (!adev->pp_enabled) {
4947                 if (adev->firmware.load_type != AMDGPU_FW_LOAD_SMU) {
4948                         /* legacy firmware loading */
4949                         r = gfx_v8_0_cp_gfx_load_microcode(adev);
4950                         if (r)
4951                                 return r;
4952
4953                         r = gfx_v8_0_cp_compute_load_microcode(adev);
4954                         if (r)
4955                                 return r;
4956                 } else {
4957                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4958                                                         AMDGPU_UCODE_ID_CP_CE);
4959                         if (r)
4960                                 return -EINVAL;
4961
4962                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4963                                                         AMDGPU_UCODE_ID_CP_PFP);
4964                         if (r)
4965                                 return -EINVAL;
4966
4967                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4968                                                         AMDGPU_UCODE_ID_CP_ME);
4969                         if (r)
4970                                 return -EINVAL;
4971
4972                         if (adev->asic_type == CHIP_TOPAZ) {
4973                                 r = gfx_v8_0_cp_compute_load_microcode(adev);
4974                                 if (r)
4975                                         return r;
4976                         } else {
4977                                 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4978                                                                                  AMDGPU_UCODE_ID_CP_MEC1);
4979                                 if (r)
4980                                         return -EINVAL;
4981                         }
4982                 }
4983         }
4984
4985         r = gfx_v8_0_cp_gfx_resume(adev);
4986         if (r)
4987                 return r;
4988
4989         r = gfx_v8_0_kiq_resume(adev);
4990         if (r)
4991                 return r;
4992
4993         gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4994
4995         return 0;
4996 }
4997
4998 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
4999 {
5000         gfx_v8_0_cp_gfx_enable(adev, enable);
5001         gfx_v8_0_cp_compute_enable(adev, enable);
5002 }
5003
5004 static int gfx_v8_0_hw_init(void *handle)
5005 {
5006         int r;
5007         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5008
5009         gfx_v8_0_init_golden_registers(adev);
5010         gfx_v8_0_gpu_init(adev);
5011
5012         r = gfx_v8_0_rlc_resume(adev);
5013         if (r)
5014                 return r;
5015
5016         r = gfx_v8_0_cp_resume(adev);
5017
5018         return r;
5019 }
5020
5021 static int gfx_v8_0_hw_fini(void *handle)
5022 {
5023         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5024
5025         amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
5026         amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
5027         if (amdgpu_sriov_vf(adev)) {
5028                 pr_debug("For SRIOV client, shouldn't do anything.\n");
5029                 return 0;
5030         }
5031         gfx_v8_0_cp_enable(adev, false);
5032         gfx_v8_0_rlc_stop(adev);
5033
5034         amdgpu_set_powergating_state(adev,
5035                         AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_UNGATE);
5036
5037         return 0;
5038 }
5039
5040 static int gfx_v8_0_suspend(void *handle)
5041 {
5042         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5043         adev->gfx.in_suspend = true;
5044         return gfx_v8_0_hw_fini(adev);
5045 }
5046
5047 static int gfx_v8_0_resume(void *handle)
5048 {
5049         int r;
5050         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5051
5052         r = gfx_v8_0_hw_init(adev);
5053         adev->gfx.in_suspend = false;
5054         return r;
5055 }
5056
5057 static bool gfx_v8_0_is_idle(void *handle)
5058 {
5059         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5060
5061         if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE))
5062                 return false;
5063         else
5064                 return true;
5065 }
5066
5067 static int gfx_v8_0_wait_for_idle(void *handle)
5068 {
5069         unsigned i;
5070         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5071
5072         for (i = 0; i < adev->usec_timeout; i++) {
5073                 if (gfx_v8_0_is_idle(handle))
5074                         return 0;
5075
5076                 udelay(1);
5077         }
5078         return -ETIMEDOUT;
5079 }
5080
5081 static bool gfx_v8_0_check_soft_reset(void *handle)
5082 {
5083         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5084         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5085         u32 tmp;
5086
5087         /* GRBM_STATUS */
5088         tmp = RREG32(mmGRBM_STATUS);
5089         if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
5090                    GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
5091                    GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
5092                    GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
5093                    GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
5094                    GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK |
5095                    GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
5096                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5097                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
5098                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5099                                                 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
5100                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5101                                                 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5102         }
5103
5104         /* GRBM_STATUS2 */
5105         tmp = RREG32(mmGRBM_STATUS2);
5106         if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
5107                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5108                                                 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
5109
5110         if (REG_GET_FIELD(tmp, GRBM_STATUS2, CPF_BUSY) ||
5111             REG_GET_FIELD(tmp, GRBM_STATUS2, CPC_BUSY) ||
5112             REG_GET_FIELD(tmp, GRBM_STATUS2, CPG_BUSY)) {
5113                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5114                                                 SOFT_RESET_CPF, 1);
5115                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5116                                                 SOFT_RESET_CPC, 1);
5117                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5118                                                 SOFT_RESET_CPG, 1);
5119                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET,
5120                                                 SOFT_RESET_GRBM, 1);
5121         }
5122
5123         /* SRBM_STATUS */
5124         tmp = RREG32(mmSRBM_STATUS);
5125         if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
5126                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5127                                                 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5128         if (REG_GET_FIELD(tmp, SRBM_STATUS, SEM_BUSY))
5129                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5130                                                 SRBM_SOFT_RESET, SOFT_RESET_SEM, 1);
5131
5132         if (grbm_soft_reset || srbm_soft_reset) {
5133                 adev->gfx.grbm_soft_reset = grbm_soft_reset;
5134                 adev->gfx.srbm_soft_reset = srbm_soft_reset;
5135                 return true;
5136         } else {
5137                 adev->gfx.grbm_soft_reset = 0;
5138                 adev->gfx.srbm_soft_reset = 0;
5139                 return false;
5140         }
5141 }
5142
5143 static int gfx_v8_0_pre_soft_reset(void *handle)
5144 {
5145         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5146         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5147
5148         if ((!adev->gfx.grbm_soft_reset) &&
5149             (!adev->gfx.srbm_soft_reset))
5150                 return 0;
5151
5152         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5153         srbm_soft_reset = adev->gfx.srbm_soft_reset;
5154
5155         /* stop the rlc */
5156         gfx_v8_0_rlc_stop(adev);
5157
5158         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5159             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5160                 /* Disable GFX parsing/prefetching */
5161                 gfx_v8_0_cp_gfx_enable(adev, false);
5162
5163         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5164             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5165             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5166             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5167                 int i;
5168
5169                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5170                         struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5171
5172                         mutex_lock(&adev->srbm_mutex);
5173                         vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5174                         gfx_v8_0_deactivate_hqd(adev, 2);
5175                         vi_srbm_select(adev, 0, 0, 0, 0);
5176                         mutex_unlock(&adev->srbm_mutex);
5177                 }
5178                 /* Disable MEC parsing/prefetching */
5179                 gfx_v8_0_cp_compute_enable(adev, false);
5180         }
5181
5182        return 0;
5183 }
5184
5185 static int gfx_v8_0_soft_reset(void *handle)
5186 {
5187         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5188         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5189         u32 tmp;
5190
5191         if ((!adev->gfx.grbm_soft_reset) &&
5192             (!adev->gfx.srbm_soft_reset))
5193                 return 0;
5194
5195         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5196         srbm_soft_reset = adev->gfx.srbm_soft_reset;
5197
5198         if (grbm_soft_reset || srbm_soft_reset) {
5199                 tmp = RREG32(mmGMCON_DEBUG);
5200                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 1);
5201                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 1);
5202                 WREG32(mmGMCON_DEBUG, tmp);
5203                 udelay(50);
5204         }
5205
5206         if (grbm_soft_reset) {
5207                 tmp = RREG32(mmGRBM_SOFT_RESET);
5208                 tmp |= grbm_soft_reset;
5209                 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5210                 WREG32(mmGRBM_SOFT_RESET, tmp);
5211                 tmp = RREG32(mmGRBM_SOFT_RESET);
5212
5213                 udelay(50);
5214
5215                 tmp &= ~grbm_soft_reset;
5216                 WREG32(mmGRBM_SOFT_RESET, tmp);
5217                 tmp = RREG32(mmGRBM_SOFT_RESET);
5218         }
5219
5220         if (srbm_soft_reset) {
5221                 tmp = RREG32(mmSRBM_SOFT_RESET);
5222                 tmp |= srbm_soft_reset;
5223                 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5224                 WREG32(mmSRBM_SOFT_RESET, tmp);
5225                 tmp = RREG32(mmSRBM_SOFT_RESET);
5226
5227                 udelay(50);
5228
5229                 tmp &= ~srbm_soft_reset;
5230                 WREG32(mmSRBM_SOFT_RESET, tmp);
5231                 tmp = RREG32(mmSRBM_SOFT_RESET);
5232         }
5233
5234         if (grbm_soft_reset || srbm_soft_reset) {
5235                 tmp = RREG32(mmGMCON_DEBUG);
5236                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 0);
5237                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 0);
5238                 WREG32(mmGMCON_DEBUG, tmp);
5239         }
5240
5241         /* Wait a little for things to settle down */
5242         udelay(50);
5243
5244         return 0;
5245 }
5246
5247 static int gfx_v8_0_post_soft_reset(void *handle)
5248 {
5249         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5250         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5251
5252         if ((!adev->gfx.grbm_soft_reset) &&
5253             (!adev->gfx.srbm_soft_reset))
5254                 return 0;
5255
5256         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5257         srbm_soft_reset = adev->gfx.srbm_soft_reset;
5258
5259         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5260             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5261                 gfx_v8_0_cp_gfx_resume(adev);
5262
5263         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5264             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5265             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5266             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5267                 int i;
5268
5269                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5270                         struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5271
5272                         mutex_lock(&adev->srbm_mutex);
5273                         vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5274                         gfx_v8_0_deactivate_hqd(adev, 2);
5275                         vi_srbm_select(adev, 0, 0, 0, 0);
5276                         mutex_unlock(&adev->srbm_mutex);
5277                 }
5278                 gfx_v8_0_kiq_resume(adev);
5279         }
5280         gfx_v8_0_rlc_start(adev);
5281
5282         return 0;
5283 }
5284
5285 /**
5286  * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
5287  *
5288  * @adev: amdgpu_device pointer
5289  *
5290  * Fetches a GPU clock counter snapshot.
5291  * Returns the 64 bit clock counter snapshot.
5292  */
5293 static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
5294 {
5295         uint64_t clock;
5296
5297         mutex_lock(&adev->gfx.gpu_clock_mutex);
5298         WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
5299         clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
5300                 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5301         mutex_unlock(&adev->gfx.gpu_clock_mutex);
5302         return clock;
5303 }
5304
5305 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
5306                                           uint32_t vmid,
5307                                           uint32_t gds_base, uint32_t gds_size,
5308                                           uint32_t gws_base, uint32_t gws_size,
5309                                           uint32_t oa_base, uint32_t oa_size)
5310 {
5311         gds_base = gds_base >> AMDGPU_GDS_SHIFT;
5312         gds_size = gds_size >> AMDGPU_GDS_SHIFT;
5313
5314         gws_base = gws_base >> AMDGPU_GWS_SHIFT;
5315         gws_size = gws_size >> AMDGPU_GWS_SHIFT;
5316
5317         oa_base = oa_base >> AMDGPU_OA_SHIFT;
5318         oa_size = oa_size >> AMDGPU_OA_SHIFT;
5319
5320         /* GDS Base */
5321         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5322         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5323                                 WRITE_DATA_DST_SEL(0)));
5324         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
5325         amdgpu_ring_write(ring, 0);
5326         amdgpu_ring_write(ring, gds_base);
5327
5328         /* GDS Size */
5329         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5330         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5331                                 WRITE_DATA_DST_SEL(0)));
5332         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
5333         amdgpu_ring_write(ring, 0);
5334         amdgpu_ring_write(ring, gds_size);
5335
5336         /* GWS */
5337         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5338         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5339                                 WRITE_DATA_DST_SEL(0)));
5340         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
5341         amdgpu_ring_write(ring, 0);
5342         amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
5343
5344         /* OA */
5345         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5346         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5347                                 WRITE_DATA_DST_SEL(0)));
5348         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
5349         amdgpu_ring_write(ring, 0);
5350         amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
5351 }
5352
5353 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
5354 {
5355         WREG32(mmSQ_IND_INDEX,
5356                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5357                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5358                 (address << SQ_IND_INDEX__INDEX__SHIFT) |
5359                 (SQ_IND_INDEX__FORCE_READ_MASK));
5360         return RREG32(mmSQ_IND_DATA);
5361 }
5362
5363 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
5364                            uint32_t wave, uint32_t thread,
5365                            uint32_t regno, uint32_t num, uint32_t *out)
5366 {
5367         WREG32(mmSQ_IND_INDEX,
5368                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5369                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5370                 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
5371                 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
5372                 (SQ_IND_INDEX__FORCE_READ_MASK) |
5373                 (SQ_IND_INDEX__AUTO_INCR_MASK));
5374         while (num--)
5375                 *(out++) = RREG32(mmSQ_IND_DATA);
5376 }
5377
5378 static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
5379 {
5380         /* type 0 wave data */
5381         dst[(*no_fields)++] = 0;
5382         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
5383         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
5384         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
5385         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
5386         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
5387         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
5388         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
5389         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
5390         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
5391         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
5392         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
5393         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
5394         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_LO);
5395         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_HI);
5396         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_LO);
5397         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI);
5398         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
5399         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
5400 }
5401
5402 static void gfx_v8_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
5403                                      uint32_t wave, uint32_t start,
5404                                      uint32_t size, uint32_t *dst)
5405 {
5406         wave_read_regs(
5407                 adev, simd, wave, 0,
5408                 start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
5409 }
5410
5411
5412 static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = {
5413         .get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter,
5414         .select_se_sh = &gfx_v8_0_select_se_sh,
5415         .read_wave_data = &gfx_v8_0_read_wave_data,
5416         .read_wave_sgprs = &gfx_v8_0_read_wave_sgprs,
5417 };
5418
5419 static int gfx_v8_0_early_init(void *handle)
5420 {
5421         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5422
5423         adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
5424         adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
5425         adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
5426         gfx_v8_0_set_ring_funcs(adev);
5427         gfx_v8_0_set_irq_funcs(adev);
5428         gfx_v8_0_set_gds_init(adev);
5429         gfx_v8_0_set_rlc_funcs(adev);
5430
5431         return 0;
5432 }
5433
5434 static int gfx_v8_0_late_init(void *handle)
5435 {
5436         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5437         int r;
5438
5439         r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
5440         if (r)
5441                 return r;
5442
5443         r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
5444         if (r)
5445                 return r;
5446
5447         /* requires IBs so do in late init after IB pool is initialized */
5448         r = gfx_v8_0_do_edc_gpr_workarounds(adev);
5449         if (r)
5450                 return r;
5451
5452         amdgpu_set_powergating_state(adev,
5453                         AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_GATE);
5454
5455         return 0;
5456 }
5457
5458 static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
5459                                                        bool enable)
5460 {
5461         if ((adev->asic_type == CHIP_POLARIS11) ||
5462             (adev->asic_type == CHIP_POLARIS12))
5463                 /* Send msg to SMU via Powerplay */
5464                 amdgpu_set_powergating_state(adev,
5465                                              AMD_IP_BLOCK_TYPE_SMC,
5466                                              enable ?
5467                                              AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE);
5468
5469         WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0);
5470 }
5471
5472 static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
5473                                                         bool enable)
5474 {
5475         WREG32_FIELD(RLC_PG_CNTL, DYN_PER_CU_PG_ENABLE, enable ? 1 : 0);
5476 }
5477
5478 static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
5479                 bool enable)
5480 {
5481         WREG32_FIELD(RLC_PG_CNTL, QUICK_PG_ENABLE, enable ? 1 : 0);
5482 }
5483
5484 static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
5485                                           bool enable)
5486 {
5487         WREG32_FIELD(RLC_PG_CNTL, GFX_POWER_GATING_ENABLE, enable ? 1 : 0);
5488 }
5489
5490 static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev,
5491                                                 bool enable)
5492 {
5493         WREG32_FIELD(RLC_PG_CNTL, GFX_PIPELINE_PG_ENABLE, enable ? 1 : 0);
5494
5495         /* Read any GFX register to wake up GFX. */
5496         if (!enable)
5497                 RREG32(mmDB_RENDER_CONTROL);
5498 }
5499
5500 static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev,
5501                                           bool enable)
5502 {
5503         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
5504                 cz_enable_gfx_cg_power_gating(adev, true);
5505                 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
5506                         cz_enable_gfx_pipeline_power_gating(adev, true);
5507         } else {
5508                 cz_enable_gfx_cg_power_gating(adev, false);
5509                 cz_enable_gfx_pipeline_power_gating(adev, false);
5510         }
5511 }
5512
5513 static int gfx_v8_0_set_powergating_state(void *handle,
5514                                           enum amd_powergating_state state)
5515 {
5516         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5517         bool enable = (state == AMD_PG_STATE_GATE);
5518
5519         if (amdgpu_sriov_vf(adev))
5520                 return 0;
5521
5522         switch (adev->asic_type) {
5523         case CHIP_CARRIZO:
5524         case CHIP_STONEY:
5525
5526                 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
5527                         cz_enable_sck_slow_down_on_power_up(adev, true);
5528                         cz_enable_sck_slow_down_on_power_down(adev, true);
5529                 } else {
5530                         cz_enable_sck_slow_down_on_power_up(adev, false);
5531                         cz_enable_sck_slow_down_on_power_down(adev, false);
5532                 }
5533                 if (adev->pg_flags & AMD_PG_SUPPORT_CP)
5534                         cz_enable_cp_power_gating(adev, true);
5535                 else
5536                         cz_enable_cp_power_gating(adev, false);
5537
5538                 cz_update_gfx_cg_power_gating(adev, enable);
5539
5540                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5541                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5542                 else
5543                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5544
5545                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5546                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5547                 else
5548                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5549                 break;
5550         case CHIP_POLARIS11:
5551         case CHIP_POLARIS12:
5552                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5553                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5554                 else
5555                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5556
5557                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5558                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5559                 else
5560                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5561
5562                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable)
5563                         polaris11_enable_gfx_quick_mg_power_gating(adev, true);
5564                 else
5565                         polaris11_enable_gfx_quick_mg_power_gating(adev, false);
5566                 break;
5567         default:
5568                 break;
5569         }
5570
5571         return 0;
5572 }
5573
5574 static void gfx_v8_0_get_clockgating_state(void *handle, u32 *flags)
5575 {
5576         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5577         int data;
5578
5579         if (amdgpu_sriov_vf(adev))
5580                 *flags = 0;
5581
5582         /* AMD_CG_SUPPORT_GFX_MGCG */
5583         data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5584         if (!(data & RLC_CGTT_MGCG_OVERRIDE__CPF_MASK))
5585                 *flags |= AMD_CG_SUPPORT_GFX_MGCG;
5586
5587         /* AMD_CG_SUPPORT_GFX_CGLG */
5588         data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5589         if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5590                 *flags |= AMD_CG_SUPPORT_GFX_CGCG;
5591
5592         /* AMD_CG_SUPPORT_GFX_CGLS */
5593         if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5594                 *flags |= AMD_CG_SUPPORT_GFX_CGLS;
5595
5596         /* AMD_CG_SUPPORT_GFX_CGTS */
5597         data = RREG32(mmCGTS_SM_CTRL_REG);
5598         if (!(data & CGTS_SM_CTRL_REG__OVERRIDE_MASK))
5599                 *flags |= AMD_CG_SUPPORT_GFX_CGTS;
5600
5601         /* AMD_CG_SUPPORT_GFX_CGTS_LS */
5602         if (!(data & CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK))
5603                 *flags |= AMD_CG_SUPPORT_GFX_CGTS_LS;
5604
5605         /* AMD_CG_SUPPORT_GFX_RLC_LS */
5606         data = RREG32(mmRLC_MEM_SLP_CNTL);
5607         if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
5608                 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
5609
5610         /* AMD_CG_SUPPORT_GFX_CP_LS */
5611         data = RREG32(mmCP_MEM_SLP_CNTL);
5612         if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
5613                 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
5614 }
5615
5616 static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
5617                                      uint32_t reg_addr, uint32_t cmd)
5618 {
5619         uint32_t data;
5620
5621         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
5622
5623         WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5624         WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5625
5626         data = RREG32(mmRLC_SERDES_WR_CTRL);
5627         if (adev->asic_type == CHIP_STONEY)
5628                 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5629                           RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5630                           RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5631                           RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5632                           RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5633                           RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5634                           RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5635                           RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5636                           RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5637         else
5638                 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5639                           RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5640                           RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5641                           RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5642                           RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5643                           RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5644                           RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5645                           RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5646                           RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
5647                           RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
5648                           RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5649         data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
5650                  (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
5651                  (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
5652                  (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
5653
5654         WREG32(mmRLC_SERDES_WR_CTRL, data);
5655 }
5656
5657 #define MSG_ENTER_RLC_SAFE_MODE     1
5658 #define MSG_EXIT_RLC_SAFE_MODE      0
5659 #define RLC_GPR_REG2__REQ_MASK 0x00000001
5660 #define RLC_GPR_REG2__REQ__SHIFT 0
5661 #define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001
5662 #define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e
5663
5664 static void iceland_enter_rlc_safe_mode(struct amdgpu_device *adev)
5665 {
5666         u32 data;
5667         unsigned i;
5668
5669         data = RREG32(mmRLC_CNTL);
5670         if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5671                 return;
5672
5673         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5674                 data |= RLC_SAFE_MODE__CMD_MASK;
5675                 data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5676                 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
5677                 WREG32(mmRLC_SAFE_MODE, data);
5678
5679                 for (i = 0; i < adev->usec_timeout; i++) {
5680                         if ((RREG32(mmRLC_GPM_STAT) &
5681                              (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5682                               RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5683                             (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5684                              RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5685                                 break;
5686                         udelay(1);
5687                 }
5688
5689                 for (i = 0; i < adev->usec_timeout; i++) {
5690                         if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5691                                 break;
5692                         udelay(1);
5693                 }
5694                 adev->gfx.rlc.in_safe_mode = true;
5695         }
5696 }
5697
5698 static void iceland_exit_rlc_safe_mode(struct amdgpu_device *adev)
5699 {
5700         u32 data = 0;
5701         unsigned i;
5702
5703         data = RREG32(mmRLC_CNTL);
5704         if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5705                 return;
5706
5707         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5708                 if (adev->gfx.rlc.in_safe_mode) {
5709                         data |= RLC_SAFE_MODE__CMD_MASK;
5710                         data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5711                         WREG32(mmRLC_SAFE_MODE, data);
5712                         adev->gfx.rlc.in_safe_mode = false;
5713                 }
5714         }
5715
5716         for (i = 0; i < adev->usec_timeout; i++) {
5717                 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5718                         break;
5719                 udelay(1);
5720         }
5721 }
5722
5723 static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
5724         .enter_safe_mode = iceland_enter_rlc_safe_mode,
5725         .exit_safe_mode = iceland_exit_rlc_safe_mode
5726 };
5727
5728 static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
5729                                                       bool enable)
5730 {
5731         uint32_t temp, data;
5732
5733         adev->gfx.rlc.funcs->enter_safe_mode(adev);
5734
5735         /* It is disabled by HW by default */
5736         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
5737                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5738                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS)
5739                                 /* 1 - RLC memory Light sleep */
5740                                 WREG32_FIELD(RLC_MEM_SLP_CNTL, RLC_MEM_LS_EN, 1);
5741
5742                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS)
5743                                 WREG32_FIELD(CP_MEM_SLP_CNTL, CP_MEM_LS_EN, 1);
5744                 }
5745
5746                 /* 3 - RLC_CGTT_MGCG_OVERRIDE */
5747                 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5748                 if (adev->flags & AMD_IS_APU)
5749                         data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5750                                   RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5751                                   RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK);
5752                 else
5753                         data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5754                                   RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5755                                   RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5756                                   RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5757
5758                 if (temp != data)
5759                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5760
5761                 /* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5762                 gfx_v8_0_wait_for_rlc_serdes(adev);
5763
5764                 /* 5 - clear mgcg override */
5765                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5766
5767                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
5768                         /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
5769                         temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5770                         data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
5771                         data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
5772                         data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
5773                         data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
5774                         if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
5775                             (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
5776                                 data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
5777                         data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
5778                         data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
5779                         if (temp != data)
5780                                 WREG32(mmCGTS_SM_CTRL_REG, data);
5781                 }
5782                 udelay(50);
5783
5784                 /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5785                 gfx_v8_0_wait_for_rlc_serdes(adev);
5786         } else {
5787                 /* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
5788                 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5789                 data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5790                                 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5791                                 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5792                                 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5793                 if (temp != data)
5794                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5795
5796                 /* 2 - disable MGLS in RLC */
5797                 data = RREG32(mmRLC_MEM_SLP_CNTL);
5798                 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
5799                         data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5800                         WREG32(mmRLC_MEM_SLP_CNTL, data);
5801                 }
5802
5803                 /* 3 - disable MGLS in CP */
5804                 data = RREG32(mmCP_MEM_SLP_CNTL);
5805                 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
5806                         data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5807                         WREG32(mmCP_MEM_SLP_CNTL, data);
5808                 }
5809
5810                 /* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
5811                 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5812                 data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
5813                                 CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
5814                 if (temp != data)
5815                         WREG32(mmCGTS_SM_CTRL_REG, data);
5816
5817                 /* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5818                 gfx_v8_0_wait_for_rlc_serdes(adev);
5819
5820                 /* 6 - set mgcg override */
5821                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5822
5823                 udelay(50);
5824
5825                 /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5826                 gfx_v8_0_wait_for_rlc_serdes(adev);
5827         }
5828
5829         adev->gfx.rlc.funcs->exit_safe_mode(adev);
5830 }
5831
5832 static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5833                                                       bool enable)
5834 {
5835         uint32_t temp, temp1, data, data1;
5836
5837         temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5838
5839         adev->gfx.rlc.funcs->enter_safe_mode(adev);
5840
5841         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
5842                 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5843                 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
5844                 if (temp1 != data1)
5845                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5846
5847                 /* : wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5848                 gfx_v8_0_wait_for_rlc_serdes(adev);
5849
5850                 /* 2 - clear cgcg override */
5851                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5852
5853                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5854                 gfx_v8_0_wait_for_rlc_serdes(adev);
5855
5856                 /* 3 - write cmd to set CGLS */
5857                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
5858
5859                 /* 4 - enable cgcg */
5860                 data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5861
5862                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5863                         /* enable cgls*/
5864                         data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5865
5866                         temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5867                         data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
5868
5869                         if (temp1 != data1)
5870                                 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5871                 } else {
5872                         data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5873                 }
5874
5875                 if (temp != data)
5876                         WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5877
5878                 /* 5 enable cntx_empty_int_enable/cntx_busy_int_enable/
5879                  * Cmp_busy/GFX_Idle interrupts
5880                  */
5881                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5882         } else {
5883                 /* disable cntx_empty_int_enable & GFX Idle interrupt */
5884                 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5885
5886                 /* TEST CGCG */
5887                 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5888                 data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
5889                                 RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
5890                 if (temp1 != data1)
5891                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5892
5893                 /* read gfx register to wake up cgcg */
5894                 RREG32(mmCB_CGTT_SCLK_CTRL);
5895                 RREG32(mmCB_CGTT_SCLK_CTRL);
5896                 RREG32(mmCB_CGTT_SCLK_CTRL);
5897                 RREG32(mmCB_CGTT_SCLK_CTRL);
5898
5899                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5900                 gfx_v8_0_wait_for_rlc_serdes(adev);
5901
5902                 /* write cmd to Set CGCG Overrride */
5903                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5904
5905                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5906                 gfx_v8_0_wait_for_rlc_serdes(adev);
5907
5908                 /* write cmd to Clear CGLS */
5909                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
5910
5911                 /* disable cgcg, cgls should be disabled too. */
5912                 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
5913                           RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
5914                 if (temp != data)
5915                         WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5916                 /* enable interrupts again for PG */
5917                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5918         }
5919
5920         gfx_v8_0_wait_for_rlc_serdes(adev);
5921
5922         adev->gfx.rlc.funcs->exit_safe_mode(adev);
5923 }
5924 static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
5925                                             bool enable)
5926 {
5927         if (enable) {
5928                 /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
5929                  * ===  MGCG + MGLS + TS(CG/LS) ===
5930                  */
5931                 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5932                 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5933         } else {
5934                 /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
5935                  * ===  CGCG + CGLS ===
5936                  */
5937                 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5938                 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5939         }
5940         return 0;
5941 }
5942
5943 static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev,
5944                                           enum amd_clockgating_state state)
5945 {
5946         uint32_t msg_id, pp_state = 0;
5947         uint32_t pp_support_state = 0;
5948         void *pp_handle = adev->powerplay.pp_handle;
5949
5950         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
5951                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5952                         pp_support_state = PP_STATE_SUPPORT_LS;
5953                         pp_state = PP_STATE_LS;
5954                 }
5955                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
5956                         pp_support_state |= PP_STATE_SUPPORT_CG;
5957                         pp_state |= PP_STATE_CG;
5958                 }
5959                 if (state == AMD_CG_STATE_UNGATE)
5960                         pp_state = 0;
5961
5962                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5963                                 PP_BLOCK_GFX_CG,
5964                                 pp_support_state,
5965                                 pp_state);
5966                 amd_set_clockgating_by_smu(pp_handle, msg_id);
5967         }
5968
5969         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
5970                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5971                         pp_support_state = PP_STATE_SUPPORT_LS;
5972                         pp_state = PP_STATE_LS;
5973                 }
5974
5975                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
5976                         pp_support_state |= PP_STATE_SUPPORT_CG;
5977                         pp_state |= PP_STATE_CG;
5978                 }
5979
5980                 if (state == AMD_CG_STATE_UNGATE)
5981                         pp_state = 0;
5982
5983                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5984                                 PP_BLOCK_GFX_MG,
5985                                 pp_support_state,
5986                                 pp_state);
5987                 amd_set_clockgating_by_smu(pp_handle, msg_id);
5988         }
5989
5990         return 0;
5991 }
5992
5993 static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev,
5994                                           enum amd_clockgating_state state)
5995 {
5996
5997         uint32_t msg_id, pp_state = 0;
5998         uint32_t pp_support_state = 0;
5999         void *pp_handle = adev->powerplay.pp_handle;
6000
6001         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
6002                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
6003                         pp_support_state = PP_STATE_SUPPORT_LS;
6004                         pp_state = PP_STATE_LS;
6005                 }
6006                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
6007                         pp_support_state |= PP_STATE_SUPPORT_CG;
6008                         pp_state |= PP_STATE_CG;
6009                 }
6010                 if (state == AMD_CG_STATE_UNGATE)
6011                         pp_state = 0;
6012
6013                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6014                                 PP_BLOCK_GFX_CG,
6015                                 pp_support_state,
6016                                 pp_state);
6017                 amd_set_clockgating_by_smu(pp_handle, msg_id);
6018         }
6019
6020         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_GFX_3D_CGLS)) {
6021                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) {
6022                         pp_support_state = PP_STATE_SUPPORT_LS;
6023                         pp_state = PP_STATE_LS;
6024                 }
6025                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) {
6026                         pp_support_state |= PP_STATE_SUPPORT_CG;
6027                         pp_state |= PP_STATE_CG;
6028                 }
6029                 if (state == AMD_CG_STATE_UNGATE)
6030                         pp_state = 0;
6031
6032                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6033                                 PP_BLOCK_GFX_3D,
6034                                 pp_support_state,
6035                                 pp_state);
6036                 amd_set_clockgating_by_smu(pp_handle, msg_id);
6037         }
6038
6039         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
6040                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
6041                         pp_support_state = PP_STATE_SUPPORT_LS;
6042                         pp_state = PP_STATE_LS;
6043                 }
6044
6045                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
6046                         pp_support_state |= PP_STATE_SUPPORT_CG;
6047                         pp_state |= PP_STATE_CG;
6048                 }
6049
6050                 if (state == AMD_CG_STATE_UNGATE)
6051                         pp_state = 0;
6052
6053                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6054                                 PP_BLOCK_GFX_MG,
6055                                 pp_support_state,
6056                                 pp_state);
6057                 amd_set_clockgating_by_smu(pp_handle, msg_id);
6058         }
6059
6060         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
6061                 pp_support_state = PP_STATE_SUPPORT_LS;
6062
6063                 if (state == AMD_CG_STATE_UNGATE)
6064                         pp_state = 0;
6065                 else
6066                         pp_state = PP_STATE_LS;
6067
6068                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6069                                 PP_BLOCK_GFX_RLC,
6070                                 pp_support_state,
6071                                 pp_state);
6072                 amd_set_clockgating_by_smu(pp_handle, msg_id);
6073         }
6074
6075         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
6076                 pp_support_state = PP_STATE_SUPPORT_LS;
6077
6078                 if (state == AMD_CG_STATE_UNGATE)
6079                         pp_state = 0;
6080                 else
6081                         pp_state = PP_STATE_LS;
6082                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6083                         PP_BLOCK_GFX_CP,
6084                         pp_support_state,
6085                         pp_state);
6086                 amd_set_clockgating_by_smu(pp_handle, msg_id);
6087         }
6088
6089         return 0;
6090 }
6091
6092 static int gfx_v8_0_set_clockgating_state(void *handle,
6093                                           enum amd_clockgating_state state)
6094 {
6095         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
6096
6097         if (amdgpu_sriov_vf(adev))
6098                 return 0;
6099
6100         switch (adev->asic_type) {
6101         case CHIP_FIJI:
6102         case CHIP_CARRIZO:
6103         case CHIP_STONEY:
6104                 gfx_v8_0_update_gfx_clock_gating(adev,
6105                                                  state == AMD_CG_STATE_GATE);
6106                 break;
6107         case CHIP_TONGA:
6108                 gfx_v8_0_tonga_update_gfx_clock_gating(adev, state);
6109                 break;
6110         case CHIP_POLARIS10:
6111         case CHIP_POLARIS11:
6112         case CHIP_POLARIS12:
6113                 gfx_v8_0_polaris_update_gfx_clock_gating(adev, state);
6114                 break;
6115         default:
6116                 break;
6117         }
6118         return 0;
6119 }
6120
6121 static u64 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring)
6122 {
6123         return ring->adev->wb.wb[ring->rptr_offs];
6124 }
6125
6126 static u64 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
6127 {
6128         struct amdgpu_device *adev = ring->adev;
6129
6130         if (ring->use_doorbell)
6131                 /* XXX check if swapping is necessary on BE */
6132                 return ring->adev->wb.wb[ring->wptr_offs];
6133         else
6134                 return RREG32(mmCP_RB0_WPTR);
6135 }
6136
6137 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
6138 {
6139         struct amdgpu_device *adev = ring->adev;
6140
6141         if (ring->use_doorbell) {
6142                 /* XXX check if swapping is necessary on BE */
6143                 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6144                 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6145         } else {
6146                 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
6147                 (void)RREG32(mmCP_RB0_WPTR);
6148         }
6149 }
6150
6151 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
6152 {
6153         u32 ref_and_mask, reg_mem_engine;
6154
6155         if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) ||
6156             (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)) {
6157                 switch (ring->me) {
6158                 case 1:
6159                         ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
6160                         break;
6161                 case 2:
6162                         ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
6163                         break;
6164                 default:
6165                         return;
6166                 }
6167                 reg_mem_engine = 0;
6168         } else {
6169                 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
6170                 reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
6171         }
6172
6173         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6174         amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
6175                                  WAIT_REG_MEM_FUNCTION(3) |  /* == */
6176                                  reg_mem_engine));
6177         amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
6178         amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
6179         amdgpu_ring_write(ring, ref_and_mask);
6180         amdgpu_ring_write(ring, ref_and_mask);
6181         amdgpu_ring_write(ring, 0x20); /* poll interval */
6182 }
6183
6184 static void gfx_v8_0_ring_emit_vgt_flush(struct amdgpu_ring *ring)
6185 {
6186         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6187         amdgpu_ring_write(ring, EVENT_TYPE(VS_PARTIAL_FLUSH) |
6188                 EVENT_INDEX(4));
6189
6190         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6191         amdgpu_ring_write(ring, EVENT_TYPE(VGT_FLUSH) |
6192                 EVENT_INDEX(0));
6193 }
6194
6195
6196 static void gfx_v8_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
6197 {
6198         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6199         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6200                                  WRITE_DATA_DST_SEL(0) |
6201                                  WR_CONFIRM));
6202         amdgpu_ring_write(ring, mmHDP_DEBUG0);
6203         amdgpu_ring_write(ring, 0);
6204         amdgpu_ring_write(ring, 1);
6205
6206 }
6207
6208 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
6209                                       struct amdgpu_ib *ib,
6210                                       unsigned vm_id, bool ctx_switch)
6211 {
6212         u32 header, control = 0;
6213
6214         if (ib->flags & AMDGPU_IB_FLAG_CE)
6215                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
6216         else
6217                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
6218
6219         control |= ib->length_dw | (vm_id << 24);
6220
6221         if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
6222                 control |= INDIRECT_BUFFER_PRE_ENB(1);
6223
6224                 if (!(ib->flags & AMDGPU_IB_FLAG_CE))
6225                         gfx_v8_0_ring_emit_de_meta(ring);
6226         }
6227
6228         amdgpu_ring_write(ring, header);
6229         amdgpu_ring_write(ring,
6230 #ifdef __BIG_ENDIAN
6231                           (2 << 0) |
6232 #endif
6233                           (ib->gpu_addr & 0xFFFFFFFC));
6234         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6235         amdgpu_ring_write(ring, control);
6236 }
6237
6238 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
6239                                           struct amdgpu_ib *ib,
6240                                           unsigned vm_id, bool ctx_switch)
6241 {
6242         u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vm_id << 24);
6243
6244         amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
6245         amdgpu_ring_write(ring,
6246 #ifdef __BIG_ENDIAN
6247                                 (2 << 0) |
6248 #endif
6249                                 (ib->gpu_addr & 0xFFFFFFFC));
6250         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6251         amdgpu_ring_write(ring, control);
6252 }
6253
6254 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
6255                                          u64 seq, unsigned flags)
6256 {
6257         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6258         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6259
6260         /* EVENT_WRITE_EOP - flush caches, send int */
6261         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6262         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6263                                  EOP_TC_ACTION_EN |
6264                                  EOP_TC_WB_ACTION_EN |
6265                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6266                                  EVENT_INDEX(5)));
6267         amdgpu_ring_write(ring, addr & 0xfffffffc);
6268         amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
6269                           DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6270         amdgpu_ring_write(ring, lower_32_bits(seq));
6271         amdgpu_ring_write(ring, upper_32_bits(seq));
6272
6273 }
6274
6275 static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
6276 {
6277         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6278         uint32_t seq = ring->fence_drv.sync_seq;
6279         uint64_t addr = ring->fence_drv.gpu_addr;
6280
6281         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6282         amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
6283                                  WAIT_REG_MEM_FUNCTION(3) | /* equal */
6284                                  WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
6285         amdgpu_ring_write(ring, addr & 0xfffffffc);
6286         amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
6287         amdgpu_ring_write(ring, seq);
6288         amdgpu_ring_write(ring, 0xffffffff);
6289         amdgpu_ring_write(ring, 4); /* poll interval */
6290 }
6291
6292 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
6293                                         unsigned vm_id, uint64_t pd_addr)
6294 {
6295         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6296
6297         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6298         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6299                                  WRITE_DATA_DST_SEL(0)) |
6300                                  WR_CONFIRM);
6301         if (vm_id < 8) {
6302                 amdgpu_ring_write(ring,
6303                                   (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id));
6304         } else {
6305                 amdgpu_ring_write(ring,
6306                                   (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vm_id - 8));
6307         }
6308         amdgpu_ring_write(ring, 0);
6309         amdgpu_ring_write(ring, pd_addr >> 12);
6310
6311         /* bits 0-15 are the VM contexts0-15 */
6312         /* invalidate the cache */
6313         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6314         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6315                                  WRITE_DATA_DST_SEL(0)));
6316         amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6317         amdgpu_ring_write(ring, 0);
6318         amdgpu_ring_write(ring, 1 << vm_id);
6319
6320         /* wait for the invalidate to complete */
6321         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6322         amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6323                                  WAIT_REG_MEM_FUNCTION(0) |  /* always */
6324                                  WAIT_REG_MEM_ENGINE(0))); /* me */
6325         amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6326         amdgpu_ring_write(ring, 0);
6327         amdgpu_ring_write(ring, 0); /* ref */
6328         amdgpu_ring_write(ring, 0); /* mask */
6329         amdgpu_ring_write(ring, 0x20); /* poll interval */
6330
6331         /* compute doesn't have PFP */
6332         if (usepfp) {
6333                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
6334                 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6335                 amdgpu_ring_write(ring, 0x0);
6336         }
6337 }
6338
6339 static u64 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
6340 {
6341         return ring->adev->wb.wb[ring->wptr_offs];
6342 }
6343
6344 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
6345 {
6346         struct amdgpu_device *adev = ring->adev;
6347
6348         /* XXX check if swapping is necessary on BE */
6349         adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6350         WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6351 }
6352
6353 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
6354                                              u64 addr, u64 seq,
6355                                              unsigned flags)
6356 {
6357         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6358         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6359
6360         /* RELEASE_MEM - flush caches, send int */
6361         amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
6362         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6363                                  EOP_TC_ACTION_EN |
6364                                  EOP_TC_WB_ACTION_EN |
6365                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6366                                  EVENT_INDEX(5)));
6367         amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6368         amdgpu_ring_write(ring, addr & 0xfffffffc);
6369         amdgpu_ring_write(ring, upper_32_bits(addr));
6370         amdgpu_ring_write(ring, lower_32_bits(seq));
6371         amdgpu_ring_write(ring, upper_32_bits(seq));
6372 }
6373
6374 static void gfx_v8_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
6375                                          u64 seq, unsigned int flags)
6376 {
6377         /* we only allocate 32bit for each seq wb address */
6378         BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
6379
6380         /* write fence seq to the "addr" */
6381         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6382         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6383                                  WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
6384         amdgpu_ring_write(ring, lower_32_bits(addr));
6385         amdgpu_ring_write(ring, upper_32_bits(addr));
6386         amdgpu_ring_write(ring, lower_32_bits(seq));
6387
6388         if (flags & AMDGPU_FENCE_FLAG_INT) {
6389                 /* set register to trigger INT */
6390                 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6391                 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6392                                          WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
6393                 amdgpu_ring_write(ring, mmCPC_INT_STATUS);
6394                 amdgpu_ring_write(ring, 0);
6395                 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
6396         }
6397 }
6398
6399 static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring)
6400 {
6401         amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6402         amdgpu_ring_write(ring, 0);
6403 }
6404
6405 static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
6406 {
6407         uint32_t dw2 = 0;
6408
6409         if (amdgpu_sriov_vf(ring->adev))
6410                 gfx_v8_0_ring_emit_ce_meta(ring);
6411
6412         dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
6413         if (flags & AMDGPU_HAVE_CTX_SWITCH) {
6414                 gfx_v8_0_ring_emit_vgt_flush(ring);
6415                 /* set load_global_config & load_global_uconfig */
6416                 dw2 |= 0x8001;
6417                 /* set load_cs_sh_regs */
6418                 dw2 |= 0x01000000;
6419                 /* set load_per_context_state & load_gfx_sh_regs for GFX */
6420                 dw2 |= 0x10002;
6421
6422                 /* set load_ce_ram if preamble presented */
6423                 if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
6424                         dw2 |= 0x10000000;
6425         } else {
6426                 /* still load_ce_ram if this is the first time preamble presented
6427                  * although there is no context switch happens.
6428                  */
6429                 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
6430                         dw2 |= 0x10000000;
6431         }
6432
6433         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6434         amdgpu_ring_write(ring, dw2);
6435         amdgpu_ring_write(ring, 0);
6436 }
6437
6438 static unsigned gfx_v8_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
6439 {
6440         unsigned ret;
6441
6442         amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
6443         amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
6444         amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
6445         amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
6446         ret = ring->wptr & ring->buf_mask;
6447         amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
6448         return ret;
6449 }
6450
6451 static void gfx_v8_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
6452 {
6453         unsigned cur;
6454
6455         BUG_ON(offset > ring->buf_mask);
6456         BUG_ON(ring->ring[offset] != 0x55aa55aa);
6457
6458         cur = (ring->wptr & ring->buf_mask) - 1;
6459         if (likely(cur > offset))
6460                 ring->ring[offset] = cur - offset;
6461         else
6462                 ring->ring[offset] = (ring->ring_size >> 2) - offset + cur;
6463 }
6464
6465 static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
6466 {
6467         struct amdgpu_device *adev = ring->adev;
6468
6469         amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
6470         amdgpu_ring_write(ring, 0 |     /* src: register*/
6471                                 (5 << 8) |      /* dst: memory */
6472                                 (1 << 20));     /* write confirm */
6473         amdgpu_ring_write(ring, reg);
6474         amdgpu_ring_write(ring, 0);
6475         amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
6476                                 adev->virt.reg_val_offs * 4));
6477         amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
6478                                 adev->virt.reg_val_offs * 4));
6479 }
6480
6481 static void gfx_v8_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
6482                                   uint32_t val)
6483 {
6484         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6485         amdgpu_ring_write(ring, (1 << 16)); /* no inc addr */
6486         amdgpu_ring_write(ring, reg);
6487         amdgpu_ring_write(ring, 0);
6488         amdgpu_ring_write(ring, val);
6489 }
6490
6491 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
6492                                                  enum amdgpu_interrupt_state state)
6493 {
6494         WREG32_FIELD(CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE,
6495                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6496 }
6497
6498 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
6499                                                      int me, int pipe,
6500                                                      enum amdgpu_interrupt_state state)
6501 {
6502         u32 mec_int_cntl, mec_int_cntl_reg;
6503
6504         /*
6505          * amdgpu controls only the first MEC. That's why this function only
6506          * handles the setting of interrupts for this specific MEC. All other
6507          * pipes' interrupts are set by amdkfd.
6508          */
6509
6510         if (me == 1) {
6511                 switch (pipe) {
6512                 case 0:
6513                         mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
6514                         break;
6515                 case 1:
6516                         mec_int_cntl_reg = mmCP_ME1_PIPE1_INT_CNTL;
6517                         break;
6518                 case 2:
6519                         mec_int_cntl_reg = mmCP_ME1_PIPE2_INT_CNTL;
6520                         break;
6521                 case 3:
6522                         mec_int_cntl_reg = mmCP_ME1_PIPE3_INT_CNTL;
6523                         break;
6524                 default:
6525                         DRM_DEBUG("invalid pipe %d\n", pipe);
6526                         return;
6527                 }
6528         } else {
6529                 DRM_DEBUG("invalid me %d\n", me);
6530                 return;
6531         }
6532
6533         switch (state) {
6534         case AMDGPU_IRQ_STATE_DISABLE:
6535                 mec_int_cntl = RREG32(mec_int_cntl_reg);
6536                 mec_int_cntl &= ~CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6537                 WREG32(mec_int_cntl_reg, mec_int_cntl);
6538                 break;
6539         case AMDGPU_IRQ_STATE_ENABLE:
6540                 mec_int_cntl = RREG32(mec_int_cntl_reg);
6541                 mec_int_cntl |= CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6542                 WREG32(mec_int_cntl_reg, mec_int_cntl);
6543                 break;
6544         default:
6545                 break;
6546         }
6547 }
6548
6549 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
6550                                              struct amdgpu_irq_src *source,
6551                                              unsigned type,
6552                                              enum amdgpu_interrupt_state state)
6553 {
6554         WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE,
6555                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6556
6557         return 0;
6558 }
6559
6560 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
6561                                               struct amdgpu_irq_src *source,
6562                                               unsigned type,
6563                                               enum amdgpu_interrupt_state state)
6564 {
6565         WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE,
6566                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6567
6568         return 0;
6569 }
6570
6571 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6572                                             struct amdgpu_irq_src *src,
6573                                             unsigned type,
6574                                             enum amdgpu_interrupt_state state)
6575 {
6576         switch (type) {
6577         case AMDGPU_CP_IRQ_GFX_EOP:
6578                 gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
6579                 break;
6580         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6581                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6582                 break;
6583         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6584                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6585                 break;
6586         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6587                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6588                 break;
6589         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6590                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6591                 break;
6592         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6593                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6594                 break;
6595         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6596                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6597                 break;
6598         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6599                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6600                 break;
6601         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6602                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6603                 break;
6604         default:
6605                 break;
6606         }
6607         return 0;
6608 }
6609
6610 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
6611                             struct amdgpu_irq_src *source,
6612                             struct amdgpu_iv_entry *entry)
6613 {
6614         int i;
6615         u8 me_id, pipe_id, queue_id;
6616         struct amdgpu_ring *ring;
6617
6618         DRM_DEBUG("IH: CP EOP\n");
6619         me_id = (entry->ring_id & 0x0c) >> 2;
6620         pipe_id = (entry->ring_id & 0x03) >> 0;
6621         queue_id = (entry->ring_id & 0x70) >> 4;
6622
6623         switch (me_id) {
6624         case 0:
6625                 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6626                 break;
6627         case 1:
6628         case 2:
6629                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6630                         ring = &adev->gfx.compute_ring[i];
6631                         /* Per-queue interrupt is supported for MEC starting from VI.
6632                           * The interrupt can only be enabled/disabled per pipe instead of per queue.
6633                           */
6634                         if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6635                                 amdgpu_fence_process(ring);
6636                 }
6637                 break;
6638         }
6639         return 0;
6640 }
6641
6642 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
6643                                  struct amdgpu_irq_src *source,
6644                                  struct amdgpu_iv_entry *entry)
6645 {
6646         DRM_ERROR("Illegal register access in command stream\n");
6647         schedule_work(&adev->reset_work);
6648         return 0;
6649 }
6650
6651 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
6652                                   struct amdgpu_irq_src *source,
6653                                   struct amdgpu_iv_entry *entry)
6654 {
6655         DRM_ERROR("Illegal instruction in command stream\n");
6656         schedule_work(&adev->reset_work);
6657         return 0;
6658 }
6659
6660 static int gfx_v8_0_kiq_set_interrupt_state(struct amdgpu_device *adev,
6661                                             struct amdgpu_irq_src *src,
6662                                             unsigned int type,
6663                                             enum amdgpu_interrupt_state state)
6664 {
6665         struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
6666
6667         switch (type) {
6668         case AMDGPU_CP_KIQ_IRQ_DRIVER0:
6669                 WREG32_FIELD(CPC_INT_CNTL, GENERIC2_INT_ENABLE,
6670                              state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6671                 if (ring->me == 1)
6672                         WREG32_FIELD_OFFSET(CP_ME1_PIPE0_INT_CNTL,
6673                                      ring->pipe,
6674                                      GENERIC2_INT_ENABLE,
6675                                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6676                 else
6677                         WREG32_FIELD_OFFSET(CP_ME2_PIPE0_INT_CNTL,
6678                                      ring->pipe,
6679                                      GENERIC2_INT_ENABLE,
6680                                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6681                 break;
6682         default:
6683                 BUG(); /* kiq only support GENERIC2_INT now */
6684                 break;
6685         }
6686         return 0;
6687 }
6688
6689 static int gfx_v8_0_kiq_irq(struct amdgpu_device *adev,
6690                             struct amdgpu_irq_src *source,
6691                             struct amdgpu_iv_entry *entry)
6692 {
6693         u8 me_id, pipe_id, queue_id;
6694         struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
6695
6696         me_id = (entry->ring_id & 0x0c) >> 2;
6697         pipe_id = (entry->ring_id & 0x03) >> 0;
6698         queue_id = (entry->ring_id & 0x70) >> 4;
6699         DRM_DEBUG("IH: CPC GENERIC2_INT, me:%d, pipe:%d, queue:%d\n",
6700                    me_id, pipe_id, queue_id);
6701
6702         amdgpu_fence_process(ring);
6703         return 0;
6704 }
6705
6706 static const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
6707         .name = "gfx_v8_0",
6708         .early_init = gfx_v8_0_early_init,
6709         .late_init = gfx_v8_0_late_init,
6710         .sw_init = gfx_v8_0_sw_init,
6711         .sw_fini = gfx_v8_0_sw_fini,
6712         .hw_init = gfx_v8_0_hw_init,
6713         .hw_fini = gfx_v8_0_hw_fini,
6714         .suspend = gfx_v8_0_suspend,
6715         .resume = gfx_v8_0_resume,
6716         .is_idle = gfx_v8_0_is_idle,
6717         .wait_for_idle = gfx_v8_0_wait_for_idle,
6718         .check_soft_reset = gfx_v8_0_check_soft_reset,
6719         .pre_soft_reset = gfx_v8_0_pre_soft_reset,
6720         .soft_reset = gfx_v8_0_soft_reset,
6721         .post_soft_reset = gfx_v8_0_post_soft_reset,
6722         .set_clockgating_state = gfx_v8_0_set_clockgating_state,
6723         .set_powergating_state = gfx_v8_0_set_powergating_state,
6724         .get_clockgating_state = gfx_v8_0_get_clockgating_state,
6725 };
6726
6727 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
6728         .type = AMDGPU_RING_TYPE_GFX,
6729         .align_mask = 0xff,
6730         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6731         .support_64bit_ptrs = false,
6732         .get_rptr = gfx_v8_0_ring_get_rptr,
6733         .get_wptr = gfx_v8_0_ring_get_wptr_gfx,
6734         .set_wptr = gfx_v8_0_ring_set_wptr_gfx,
6735         .emit_frame_size = /* maximum 215dw if count 16 IBs in */
6736                 5 +  /* COND_EXEC */
6737                 7 +  /* PIPELINE_SYNC */
6738                 19 + /* VM_FLUSH */
6739                 8 +  /* FENCE for VM_FLUSH */
6740                 20 + /* GDS switch */
6741                 4 + /* double SWITCH_BUFFER,
6742                        the first COND_EXEC jump to the place just
6743                            prior to this double SWITCH_BUFFER  */
6744                 5 + /* COND_EXEC */
6745                 7 +      /*     HDP_flush */
6746                 4 +      /*     VGT_flush */
6747                 14 + /* CE_META */
6748                 31 + /* DE_META */
6749                 3 + /* CNTX_CTRL */
6750                 5 + /* HDP_INVL */
6751                 8 + 8 + /* FENCE x2 */
6752                 2, /* SWITCH_BUFFER */
6753         .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_gfx */
6754         .emit_ib = gfx_v8_0_ring_emit_ib_gfx,
6755         .emit_fence = gfx_v8_0_ring_emit_fence_gfx,
6756         .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6757         .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6758         .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6759         .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6760         .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
6761         .test_ring = gfx_v8_0_ring_test_ring,
6762         .test_ib = gfx_v8_0_ring_test_ib,
6763         .insert_nop = amdgpu_ring_insert_nop,
6764         .pad_ib = amdgpu_ring_generic_pad_ib,
6765         .emit_switch_buffer = gfx_v8_ring_emit_sb,
6766         .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
6767         .init_cond_exec = gfx_v8_0_ring_emit_init_cond_exec,
6768         .patch_cond_exec = gfx_v8_0_ring_emit_patch_cond_exec,
6769 };
6770
6771 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
6772         .type = AMDGPU_RING_TYPE_COMPUTE,
6773         .align_mask = 0xff,
6774         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6775         .support_64bit_ptrs = false,
6776         .get_rptr = gfx_v8_0_ring_get_rptr,
6777         .get_wptr = gfx_v8_0_ring_get_wptr_compute,
6778         .set_wptr = gfx_v8_0_ring_set_wptr_compute,
6779         .emit_frame_size =
6780                 20 + /* gfx_v8_0_ring_emit_gds_switch */
6781                 7 + /* gfx_v8_0_ring_emit_hdp_flush */
6782                 5 + /* gfx_v8_0_ring_emit_hdp_invalidate */
6783                 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6784                 17 + /* gfx_v8_0_ring_emit_vm_flush */
6785                 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
6786         .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */
6787         .emit_ib = gfx_v8_0_ring_emit_ib_compute,
6788         .emit_fence = gfx_v8_0_ring_emit_fence_compute,
6789         .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6790         .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6791         .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6792         .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6793         .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
6794         .test_ring = gfx_v8_0_ring_test_ring,
6795         .test_ib = gfx_v8_0_ring_test_ib,
6796         .insert_nop = amdgpu_ring_insert_nop,
6797         .pad_ib = amdgpu_ring_generic_pad_ib,
6798 };
6799
6800 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {
6801         .type = AMDGPU_RING_TYPE_KIQ,
6802         .align_mask = 0xff,
6803         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6804         .support_64bit_ptrs = false,
6805         .get_rptr = gfx_v8_0_ring_get_rptr,
6806         .get_wptr = gfx_v8_0_ring_get_wptr_compute,
6807         .set_wptr = gfx_v8_0_ring_set_wptr_compute,
6808         .emit_frame_size =
6809                 20 + /* gfx_v8_0_ring_emit_gds_switch */
6810                 7 + /* gfx_v8_0_ring_emit_hdp_flush */
6811                 5 + /* gfx_v8_0_ring_emit_hdp_invalidate */
6812                 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6813                 17 + /* gfx_v8_0_ring_emit_vm_flush */
6814                 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */
6815         .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */
6816         .emit_ib = gfx_v8_0_ring_emit_ib_compute,
6817         .emit_fence = gfx_v8_0_ring_emit_fence_kiq,
6818         .test_ring = gfx_v8_0_ring_test_ring,
6819         .test_ib = gfx_v8_0_ring_test_ib,
6820         .insert_nop = amdgpu_ring_insert_nop,
6821         .pad_ib = amdgpu_ring_generic_pad_ib,
6822         .emit_rreg = gfx_v8_0_ring_emit_rreg,
6823         .emit_wreg = gfx_v8_0_ring_emit_wreg,
6824 };
6825
6826 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
6827 {
6828         int i;
6829
6830         adev->gfx.kiq.ring.funcs = &gfx_v8_0_ring_funcs_kiq;
6831
6832         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
6833                 adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
6834
6835         for (i = 0; i < adev->gfx.num_compute_rings; i++)
6836                 adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
6837 }
6838
6839 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
6840         .set = gfx_v8_0_set_eop_interrupt_state,
6841         .process = gfx_v8_0_eop_irq,
6842 };
6843
6844 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
6845         .set = gfx_v8_0_set_priv_reg_fault_state,
6846         .process = gfx_v8_0_priv_reg_irq,
6847 };
6848
6849 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
6850         .set = gfx_v8_0_set_priv_inst_fault_state,
6851         .process = gfx_v8_0_priv_inst_irq,
6852 };
6853
6854 static const struct amdgpu_irq_src_funcs gfx_v8_0_kiq_irq_funcs = {
6855         .set = gfx_v8_0_kiq_set_interrupt_state,
6856         .process = gfx_v8_0_kiq_irq,
6857 };
6858
6859 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
6860 {
6861         adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
6862         adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
6863
6864         adev->gfx.priv_reg_irq.num_types = 1;
6865         adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
6866
6867         adev->gfx.priv_inst_irq.num_types = 1;
6868         adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
6869
6870         adev->gfx.kiq.irq.num_types = AMDGPU_CP_KIQ_IRQ_LAST;
6871         adev->gfx.kiq.irq.funcs = &gfx_v8_0_kiq_irq_funcs;
6872 }
6873
6874 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
6875 {
6876         adev->gfx.rlc.funcs = &iceland_rlc_funcs;
6877 }
6878
6879 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
6880 {
6881         /* init asci gds info */
6882         adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
6883         adev->gds.gws.total_size = 64;
6884         adev->gds.oa.total_size = 16;
6885
6886         if (adev->gds.mem.total_size == 64 * 1024) {
6887                 adev->gds.mem.gfx_partition_size = 4096;
6888                 adev->gds.mem.cs_partition_size = 4096;
6889
6890                 adev->gds.gws.gfx_partition_size = 4;
6891                 adev->gds.gws.cs_partition_size = 4;
6892
6893                 adev->gds.oa.gfx_partition_size = 4;
6894                 adev->gds.oa.cs_partition_size = 1;
6895         } else {
6896                 adev->gds.mem.gfx_partition_size = 1024;
6897                 adev->gds.mem.cs_partition_size = 1024;
6898
6899                 adev->gds.gws.gfx_partition_size = 16;
6900                 adev->gds.gws.cs_partition_size = 16;
6901
6902                 adev->gds.oa.gfx_partition_size = 4;
6903                 adev->gds.oa.cs_partition_size = 4;
6904         }
6905 }
6906
6907 static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
6908                                                  u32 bitmap)
6909 {
6910         u32 data;
6911
6912         if (!bitmap)
6913                 return;
6914
6915         data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
6916         data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
6917
6918         WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
6919 }
6920
6921 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
6922 {
6923         u32 data, mask;
6924
6925         data =  RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) |
6926                 RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
6927
6928         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
6929
6930         return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask;
6931 }
6932
6933 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
6934 {
6935         int i, j, k, counter, active_cu_number = 0;
6936         u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
6937         struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
6938         unsigned disable_masks[4 * 2];
6939         u32 ao_cu_num;
6940
6941         memset(cu_info, 0, sizeof(*cu_info));
6942
6943         if (adev->flags & AMD_IS_APU)
6944                 ao_cu_num = 2;
6945         else
6946                 ao_cu_num = adev->gfx.config.max_cu_per_sh;
6947
6948         amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
6949
6950         mutex_lock(&adev->grbm_idx_mutex);
6951         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
6952                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
6953                         mask = 1;
6954                         ao_bitmap = 0;
6955                         counter = 0;
6956                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
6957                         if (i < 4 && j < 2)
6958                                 gfx_v8_0_set_user_cu_inactive_bitmap(
6959                                         adev, disable_masks[i * 2 + j]);
6960                         bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
6961                         cu_info->bitmap[i][j] = bitmap;
6962
6963                         for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
6964                                 if (bitmap & mask) {
6965                                         if (counter < ao_cu_num)
6966                                                 ao_bitmap |= mask;
6967                                         counter ++;
6968                                 }
6969                                 mask <<= 1;
6970                         }
6971                         active_cu_number += counter;
6972                         if (i < 2 && j < 2)
6973                                 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
6974                         cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
6975                 }
6976         }
6977         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6978         mutex_unlock(&adev->grbm_idx_mutex);
6979
6980         cu_info->number = active_cu_number;
6981         cu_info->ao_cu_mask = ao_cu_mask;
6982 }
6983
6984 const struct amdgpu_ip_block_version gfx_v8_0_ip_block =
6985 {
6986         .type = AMD_IP_BLOCK_TYPE_GFX,
6987         .major = 8,
6988         .minor = 0,
6989         .rev = 0,
6990         .funcs = &gfx_v8_0_ip_funcs,
6991 };
6992
6993 const struct amdgpu_ip_block_version gfx_v8_1_ip_block =
6994 {
6995         .type = AMD_IP_BLOCK_TYPE_GFX,
6996         .major = 8,
6997         .minor = 1,
6998         .rev = 0,
6999         .funcs = &gfx_v8_0_ip_funcs,
7000 };
7001
7002 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
7003 {
7004         uint64_t ce_payload_addr;
7005         int cnt_ce;
7006         static union {
7007                 struct vi_ce_ib_state regular;
7008                 struct vi_ce_ib_state_chained_ib chained;
7009         } ce_payload = {};
7010
7011         if (ring->adev->virt.chained_ib_support) {
7012                 ce_payload_addr = AMDGPU_VA_RESERVED_SIZE - 2 * 4096 +
7013                                                   offsetof(struct vi_gfx_meta_data_chained_ib, ce_payload);
7014                 cnt_ce = (sizeof(ce_payload.chained) >> 2) + 4 - 2;
7015         } else {
7016                 ce_payload_addr = AMDGPU_VA_RESERVED_SIZE - 2 * 4096 +
7017                                                   offsetof(struct vi_gfx_meta_data, ce_payload);
7018                 cnt_ce = (sizeof(ce_payload.regular) >> 2) + 4 - 2;
7019         }
7020
7021         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_ce));
7022         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
7023                                 WRITE_DATA_DST_SEL(8) |
7024                                 WR_CONFIRM) |
7025                                 WRITE_DATA_CACHE_POLICY(0));
7026         amdgpu_ring_write(ring, lower_32_bits(ce_payload_addr));
7027         amdgpu_ring_write(ring, upper_32_bits(ce_payload_addr));
7028         amdgpu_ring_write_multiple(ring, (void *)&ce_payload, cnt_ce - 2);
7029 }
7030
7031 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring)
7032 {
7033         uint64_t de_payload_addr, gds_addr, csa_addr;
7034         int cnt_de;
7035         static union {
7036                 struct vi_de_ib_state regular;
7037                 struct vi_de_ib_state_chained_ib chained;
7038         } de_payload = {};
7039
7040         csa_addr = AMDGPU_VA_RESERVED_SIZE - 2 * 4096;
7041         gds_addr = csa_addr + 4096;
7042         if (ring->adev->virt.chained_ib_support) {
7043                 de_payload.chained.gds_backup_addrlo = lower_32_bits(gds_addr);
7044                 de_payload.chained.gds_backup_addrhi = upper_32_bits(gds_addr);
7045                 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data_chained_ib, de_payload);
7046                 cnt_de = (sizeof(de_payload.chained) >> 2) + 4 - 2;
7047         } else {
7048                 de_payload.regular.gds_backup_addrlo = lower_32_bits(gds_addr);
7049                 de_payload.regular.gds_backup_addrhi = upper_32_bits(gds_addr);
7050                 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data, de_payload);
7051                 cnt_de = (sizeof(de_payload.regular) >> 2) + 4 - 2;
7052         }
7053
7054         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_de));
7055         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
7056                                 WRITE_DATA_DST_SEL(8) |
7057                                 WR_CONFIRM) |
7058                                 WRITE_DATA_CACHE_POLICY(0));
7059         amdgpu_ring_write(ring, lower_32_bits(de_payload_addr));
7060         amdgpu_ring_write(ring, upper_32_bits(de_payload_addr));
7061         amdgpu_ring_write_multiple(ring, (void *)&de_payload, cnt_de - 2);
7062 }
This page took 0.465753 seconds and 4 git commands to generate.