]> Git Repo - linux.git/blob - drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dtor/input
[linux.git] / drivers / gpu / drm / amd / amdgpu / gfx_v8_0.c
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 #include <linux/firmware.h>
24 #include <drm/drmP.h>
25 #include "amdgpu.h"
26 #include "amdgpu_gfx.h"
27 #include "vi.h"
28 #include "vi_structs.h"
29 #include "vid.h"
30 #include "amdgpu_ucode.h"
31 #include "amdgpu_atombios.h"
32 #include "atombios_i2c.h"
33 #include "clearstate_vi.h"
34
35 #include "gmc/gmc_8_2_d.h"
36 #include "gmc/gmc_8_2_sh_mask.h"
37
38 #include "oss/oss_3_0_d.h"
39 #include "oss/oss_3_0_sh_mask.h"
40
41 #include "bif/bif_5_0_d.h"
42 #include "bif/bif_5_0_sh_mask.h"
43 #include "gca/gfx_8_0_d.h"
44 #include "gca/gfx_8_0_enum.h"
45 #include "gca/gfx_8_0_sh_mask.h"
46 #include "gca/gfx_8_0_enum.h"
47
48 #include "dce/dce_10_0_d.h"
49 #include "dce/dce_10_0_sh_mask.h"
50
51 #include "smu/smu_7_1_3_d.h"
52
53 #define GFX8_NUM_GFX_RINGS     1
54 #define GFX8_MEC_HPD_SIZE 2048
55
56 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
57 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
58 #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
59 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
60
61 #define ARRAY_MODE(x)                                   ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
62 #define PIPE_CONFIG(x)                                  ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
63 #define TILE_SPLIT(x)                                   ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
64 #define MICRO_TILE_MODE_NEW(x)                          ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
65 #define SAMPLE_SPLIT(x)                                 ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
66 #define BANK_WIDTH(x)                                   ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
67 #define BANK_HEIGHT(x)                                  ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
68 #define MACRO_TILE_ASPECT(x)                            ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
69 #define NUM_BANKS(x)                                    ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
70
71 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK            0x00000001L
72 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK            0x00000002L
73 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK           0x00000004L
74 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK           0x00000008L
75 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK           0x00000010L
76 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK           0x00000020L
77
78 /* BPM SERDES CMD */
79 #define SET_BPM_SERDES_CMD    1
80 #define CLE_BPM_SERDES_CMD    0
81
82 /* BPM Register Address*/
83 enum {
84         BPM_REG_CGLS_EN = 0,        /* Enable/Disable CGLS */
85         BPM_REG_CGLS_ON,            /* ON/OFF CGLS: shall be controlled by RLC FW */
86         BPM_REG_CGCG_OVERRIDE,      /* Set/Clear CGCG Override */
87         BPM_REG_MGCG_OVERRIDE,      /* Set/Clear MGCG Override */
88         BPM_REG_FGCG_OVERRIDE,      /* Set/Clear FGCG Override */
89         BPM_REG_FGCG_MAX
90 };
91
92 #define RLC_FormatDirectRegListLength        14
93
94 MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
95 MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
96 MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
97 MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
98 MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
99 MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
100
101 MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
102 MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
103 MODULE_FIRMWARE("amdgpu/stoney_me.bin");
104 MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
105 MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
106
107 MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
108 MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
109 MODULE_FIRMWARE("amdgpu/tonga_me.bin");
110 MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
111 MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
112 MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
113
114 MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
115 MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
116 MODULE_FIRMWARE("amdgpu/topaz_me.bin");
117 MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
118 MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
119
120 MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
121 MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
122 MODULE_FIRMWARE("amdgpu/fiji_me.bin");
123 MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
124 MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
125 MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
126
127 MODULE_FIRMWARE("amdgpu/polaris11_ce.bin");
128 MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin");
129 MODULE_FIRMWARE("amdgpu/polaris11_me.bin");
130 MODULE_FIRMWARE("amdgpu/polaris11_mec.bin");
131 MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin");
132 MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin");
133
134 MODULE_FIRMWARE("amdgpu/polaris10_ce.bin");
135 MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin");
136 MODULE_FIRMWARE("amdgpu/polaris10_me.bin");
137 MODULE_FIRMWARE("amdgpu/polaris10_mec.bin");
138 MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin");
139 MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin");
140
141 MODULE_FIRMWARE("amdgpu/polaris12_ce.bin");
142 MODULE_FIRMWARE("amdgpu/polaris12_pfp.bin");
143 MODULE_FIRMWARE("amdgpu/polaris12_me.bin");
144 MODULE_FIRMWARE("amdgpu/polaris12_mec.bin");
145 MODULE_FIRMWARE("amdgpu/polaris12_mec2.bin");
146 MODULE_FIRMWARE("amdgpu/polaris12_rlc.bin");
147
148 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
149 {
150         {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
151         {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
152         {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
153         {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
154         {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
155         {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
156         {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
157         {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
158         {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
159         {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
160         {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
161         {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
162         {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
163         {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
164         {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
165         {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
166 };
167
168 static const u32 golden_settings_tonga_a11[] =
169 {
170         mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
171         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
172         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
173         mmGB_GPU_ID, 0x0000000f, 0x00000000,
174         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
175         mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
176         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
177         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
178         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
179         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
180         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
181         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
182         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
183         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
184         mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
185         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
186 };
187
188 static const u32 tonga_golden_common_all[] =
189 {
190         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
191         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
192         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
193         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
194         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
195         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
196         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
197         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
198 };
199
200 static const u32 tonga_mgcg_cgcg_init[] =
201 {
202         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
203         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
204         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
205         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
206         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
207         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
208         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
209         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
210         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
211         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
212         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
213         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
214         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
215         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
216         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
217         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
218         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
219         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
220         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
221         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
222         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
223         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
224         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
225         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
226         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
227         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
228         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
229         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
230         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
231         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
232         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
233         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
234         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
235         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
236         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
237         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
238         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
239         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
240         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
241         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
242         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
243         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
244         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
245         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
246         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
247         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
248         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
249         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
250         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
251         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
252         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
253         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
254         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
255         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
256         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
257         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
258         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
259         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
260         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
261         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
262         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
263         mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
264         mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
265         mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
266         mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
267         mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
268         mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
269         mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
270         mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
271         mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
272         mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
273         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
274         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
275         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
276         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
277 };
278
279 static const u32 golden_settings_polaris11_a11[] =
280 {
281         mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208,
282         mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
283         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
284         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
285         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
286         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
287         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
288         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
289         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
290         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
291         mmSQ_CONFIG, 0x07f80000, 0x01180000,
292         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
293         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
294         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
295         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
296         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
297         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
298 };
299
300 static const u32 polaris11_golden_common_all[] =
301 {
302         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
303         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002,
304         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
305         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
306         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
307         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
308 };
309
310 static const u32 golden_settings_polaris10_a11[] =
311 {
312         mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
313         mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
314         mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
315         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
316         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
317         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
318         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
319         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
320         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a,
321         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
322         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
323         mmSQ_CONFIG, 0x07f80000, 0x07180000,
324         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
325         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
326         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
327         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
328         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
329 };
330
331 static const u32 polaris10_golden_common_all[] =
332 {
333         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
334         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
335         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
336         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
337         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
338         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
339         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
340         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
341 };
342
343 static const u32 fiji_golden_common_all[] =
344 {
345         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
346         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
347         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
348         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
349         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
350         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
351         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
352         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
353         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
354         mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
355 };
356
357 static const u32 golden_settings_fiji_a10[] =
358 {
359         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
360         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
361         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
362         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
363         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
364         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
365         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
366         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
367         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
368         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
369         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
370 };
371
372 static const u32 fiji_mgcg_cgcg_init[] =
373 {
374         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
375         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
376         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
377         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
378         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
379         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
380         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
381         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
382         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
383         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
384         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
385         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
386         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
387         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
388         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
389         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
390         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
391         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
392         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
393         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
394         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
395         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
396         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
397         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
398         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
399         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
400         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
401         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
402         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
403         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
404         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
405         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
406         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
407         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
408         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
409 };
410
411 static const u32 golden_settings_iceland_a11[] =
412 {
413         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
414         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
415         mmDB_DEBUG3, 0xc0000000, 0xc0000000,
416         mmGB_GPU_ID, 0x0000000f, 0x00000000,
417         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
418         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
419         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
420         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
421         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
422         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
423         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
424         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
425         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
426         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
427         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
428         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
429 };
430
431 static const u32 iceland_golden_common_all[] =
432 {
433         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
434         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
435         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
436         mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
437         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
438         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
439         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
440         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
441 };
442
443 static const u32 iceland_mgcg_cgcg_init[] =
444 {
445         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
446         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
447         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
448         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
449         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
450         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
451         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
452         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
453         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
454         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
455         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
456         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
457         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
458         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
459         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
460         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
461         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
462         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
463         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
464         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
465         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
466         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
467         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
468         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
469         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
470         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
471         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
472         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
473         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
474         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
475         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
476         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
477         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
478         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
479         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
480         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
481         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
482         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
483         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
484         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
485         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
486         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
487         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
488         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
489         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
490         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
491         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
492         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
493         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
494         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
495         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
496         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
497         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
498         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
499         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
500         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
501         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
502         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
503         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
504         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
505         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
506         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
507         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
508         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
509 };
510
511 static const u32 cz_golden_settings_a11[] =
512 {
513         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
514         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
515         mmGB_GPU_ID, 0x0000000f, 0x00000000,
516         mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
517         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
518         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
519         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
520         mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
521         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
522         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
523         mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
524         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
525 };
526
527 static const u32 cz_golden_common_all[] =
528 {
529         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
530         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
531         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
532         mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
533         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
534         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
535         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
536         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
537 };
538
539 static const u32 cz_mgcg_cgcg_init[] =
540 {
541         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
542         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
543         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
544         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
545         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
546         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
547         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
548         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
549         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
550         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
551         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
552         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
553         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
554         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
555         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
556         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
557         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
558         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
559         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
560         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
561         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
562         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
563         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
564         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
565         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
566         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
567         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
568         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
569         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
570         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
571         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
572         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
573         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
574         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
575         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
576         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
577         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
578         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
579         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
580         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
581         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
582         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
583         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
584         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
585         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
586         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
587         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
588         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
589         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
590         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
591         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
592         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
593         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
594         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
595         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
596         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
597         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
598         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
599         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
600         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
601         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
602         mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
603         mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
604         mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
605         mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
606         mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
607         mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
608         mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
609         mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
610         mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
611         mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
612         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
613         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
614         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
615         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
616 };
617
618 static const u32 stoney_golden_settings_a11[] =
619 {
620         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
621         mmGB_GPU_ID, 0x0000000f, 0x00000000,
622         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
623         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
624         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
625         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
626         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
627         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
628         mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
629         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
630 };
631
632 static const u32 stoney_golden_common_all[] =
633 {
634         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
635         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
636         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
637         mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
638         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
639         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
640         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
641         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
642 };
643
644 static const u32 stoney_mgcg_cgcg_init[] =
645 {
646         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
647         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
648         mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
649         mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
650         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
651 };
652
653 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
654 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
655 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
656 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
657 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
658 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
659 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring);
660 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring);
661
662 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
663 {
664         switch (adev->asic_type) {
665         case CHIP_TOPAZ:
666                 amdgpu_program_register_sequence(adev,
667                                                  iceland_mgcg_cgcg_init,
668                                                  (const u32)ARRAY_SIZE(iceland_mgcg_cgcg_init));
669                 amdgpu_program_register_sequence(adev,
670                                                  golden_settings_iceland_a11,
671                                                  (const u32)ARRAY_SIZE(golden_settings_iceland_a11));
672                 amdgpu_program_register_sequence(adev,
673                                                  iceland_golden_common_all,
674                                                  (const u32)ARRAY_SIZE(iceland_golden_common_all));
675                 break;
676         case CHIP_FIJI:
677                 amdgpu_program_register_sequence(adev,
678                                                  fiji_mgcg_cgcg_init,
679                                                  (const u32)ARRAY_SIZE(fiji_mgcg_cgcg_init));
680                 amdgpu_program_register_sequence(adev,
681                                                  golden_settings_fiji_a10,
682                                                  (const u32)ARRAY_SIZE(golden_settings_fiji_a10));
683                 amdgpu_program_register_sequence(adev,
684                                                  fiji_golden_common_all,
685                                                  (const u32)ARRAY_SIZE(fiji_golden_common_all));
686                 break;
687
688         case CHIP_TONGA:
689                 amdgpu_program_register_sequence(adev,
690                                                  tonga_mgcg_cgcg_init,
691                                                  (const u32)ARRAY_SIZE(tonga_mgcg_cgcg_init));
692                 amdgpu_program_register_sequence(adev,
693                                                  golden_settings_tonga_a11,
694                                                  (const u32)ARRAY_SIZE(golden_settings_tonga_a11));
695                 amdgpu_program_register_sequence(adev,
696                                                  tonga_golden_common_all,
697                                                  (const u32)ARRAY_SIZE(tonga_golden_common_all));
698                 break;
699         case CHIP_POLARIS11:
700         case CHIP_POLARIS12:
701                 amdgpu_program_register_sequence(adev,
702                                                  golden_settings_polaris11_a11,
703                                                  (const u32)ARRAY_SIZE(golden_settings_polaris11_a11));
704                 amdgpu_program_register_sequence(adev,
705                                                  polaris11_golden_common_all,
706                                                  (const u32)ARRAY_SIZE(polaris11_golden_common_all));
707                 break;
708         case CHIP_POLARIS10:
709                 amdgpu_program_register_sequence(adev,
710                                                  golden_settings_polaris10_a11,
711                                                  (const u32)ARRAY_SIZE(golden_settings_polaris10_a11));
712                 amdgpu_program_register_sequence(adev,
713                                                  polaris10_golden_common_all,
714                                                  (const u32)ARRAY_SIZE(polaris10_golden_common_all));
715                 WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C);
716                 if (adev->pdev->revision == 0xc7 &&
717                     ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) ||
718                      (adev->pdev->subsystem_device == 0x4a8 && adev->pdev->subsystem_vendor == 0x1043) ||
719                      (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1682))) {
720                         amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD);
721                         amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0);
722                 }
723                 break;
724         case CHIP_CARRIZO:
725                 amdgpu_program_register_sequence(adev,
726                                                  cz_mgcg_cgcg_init,
727                                                  (const u32)ARRAY_SIZE(cz_mgcg_cgcg_init));
728                 amdgpu_program_register_sequence(adev,
729                                                  cz_golden_settings_a11,
730                                                  (const u32)ARRAY_SIZE(cz_golden_settings_a11));
731                 amdgpu_program_register_sequence(adev,
732                                                  cz_golden_common_all,
733                                                  (const u32)ARRAY_SIZE(cz_golden_common_all));
734                 break;
735         case CHIP_STONEY:
736                 amdgpu_program_register_sequence(adev,
737                                                  stoney_mgcg_cgcg_init,
738                                                  (const u32)ARRAY_SIZE(stoney_mgcg_cgcg_init));
739                 amdgpu_program_register_sequence(adev,
740                                                  stoney_golden_settings_a11,
741                                                  (const u32)ARRAY_SIZE(stoney_golden_settings_a11));
742                 amdgpu_program_register_sequence(adev,
743                                                  stoney_golden_common_all,
744                                                  (const u32)ARRAY_SIZE(stoney_golden_common_all));
745                 break;
746         default:
747                 break;
748         }
749 }
750
751 static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
752 {
753         adev->gfx.scratch.num_reg = 8;
754         adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
755         adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
756 }
757
758 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
759 {
760         struct amdgpu_device *adev = ring->adev;
761         uint32_t scratch;
762         uint32_t tmp = 0;
763         unsigned i;
764         int r;
765
766         r = amdgpu_gfx_scratch_get(adev, &scratch);
767         if (r) {
768                 DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
769                 return r;
770         }
771         WREG32(scratch, 0xCAFEDEAD);
772         r = amdgpu_ring_alloc(ring, 3);
773         if (r) {
774                 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
775                           ring->idx, r);
776                 amdgpu_gfx_scratch_free(adev, scratch);
777                 return r;
778         }
779         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
780         amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
781         amdgpu_ring_write(ring, 0xDEADBEEF);
782         amdgpu_ring_commit(ring);
783
784         for (i = 0; i < adev->usec_timeout; i++) {
785                 tmp = RREG32(scratch);
786                 if (tmp == 0xDEADBEEF)
787                         break;
788                 DRM_UDELAY(1);
789         }
790         if (i < adev->usec_timeout) {
791                 DRM_INFO("ring test on %d succeeded in %d usecs\n",
792                          ring->idx, i);
793         } else {
794                 DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
795                           ring->idx, scratch, tmp);
796                 r = -EINVAL;
797         }
798         amdgpu_gfx_scratch_free(adev, scratch);
799         return r;
800 }
801
802 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
803 {
804         struct amdgpu_device *adev = ring->adev;
805         struct amdgpu_ib ib;
806         struct dma_fence *f = NULL;
807         uint32_t scratch;
808         uint32_t tmp = 0;
809         long r;
810
811         r = amdgpu_gfx_scratch_get(adev, &scratch);
812         if (r) {
813                 DRM_ERROR("amdgpu: failed to get scratch reg (%ld).\n", r);
814                 return r;
815         }
816         WREG32(scratch, 0xCAFEDEAD);
817         memset(&ib, 0, sizeof(ib));
818         r = amdgpu_ib_get(adev, NULL, 256, &ib);
819         if (r) {
820                 DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
821                 goto err1;
822         }
823         ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
824         ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START));
825         ib.ptr[2] = 0xDEADBEEF;
826         ib.length_dw = 3;
827
828         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
829         if (r)
830                 goto err2;
831
832         r = dma_fence_wait_timeout(f, false, timeout);
833         if (r == 0) {
834                 DRM_ERROR("amdgpu: IB test timed out.\n");
835                 r = -ETIMEDOUT;
836                 goto err2;
837         } else if (r < 0) {
838                 DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
839                 goto err2;
840         }
841         tmp = RREG32(scratch);
842         if (tmp == 0xDEADBEEF) {
843                 DRM_INFO("ib test on ring %d succeeded\n", ring->idx);
844                 r = 0;
845         } else {
846                 DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n",
847                           scratch, tmp);
848                 r = -EINVAL;
849         }
850 err2:
851         amdgpu_ib_free(adev, &ib, NULL);
852         dma_fence_put(f);
853 err1:
854         amdgpu_gfx_scratch_free(adev, scratch);
855         return r;
856 }
857
858
859 static void gfx_v8_0_free_microcode(struct amdgpu_device *adev)
860 {
861         release_firmware(adev->gfx.pfp_fw);
862         adev->gfx.pfp_fw = NULL;
863         release_firmware(adev->gfx.me_fw);
864         adev->gfx.me_fw = NULL;
865         release_firmware(adev->gfx.ce_fw);
866         adev->gfx.ce_fw = NULL;
867         release_firmware(adev->gfx.rlc_fw);
868         adev->gfx.rlc_fw = NULL;
869         release_firmware(adev->gfx.mec_fw);
870         adev->gfx.mec_fw = NULL;
871         if ((adev->asic_type != CHIP_STONEY) &&
872             (adev->asic_type != CHIP_TOPAZ))
873                 release_firmware(adev->gfx.mec2_fw);
874         adev->gfx.mec2_fw = NULL;
875
876         kfree(adev->gfx.rlc.register_list_format);
877 }
878
879 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
880 {
881         const char *chip_name;
882         char fw_name[30];
883         int err;
884         struct amdgpu_firmware_info *info = NULL;
885         const struct common_firmware_header *header = NULL;
886         const struct gfx_firmware_header_v1_0 *cp_hdr;
887         const struct rlc_firmware_header_v2_0 *rlc_hdr;
888         unsigned int *tmp = NULL, i;
889
890         DRM_DEBUG("\n");
891
892         switch (adev->asic_type) {
893         case CHIP_TOPAZ:
894                 chip_name = "topaz";
895                 break;
896         case CHIP_TONGA:
897                 chip_name = "tonga";
898                 break;
899         case CHIP_CARRIZO:
900                 chip_name = "carrizo";
901                 break;
902         case CHIP_FIJI:
903                 chip_name = "fiji";
904                 break;
905         case CHIP_POLARIS11:
906                 chip_name = "polaris11";
907                 break;
908         case CHIP_POLARIS10:
909                 chip_name = "polaris10";
910                 break;
911         case CHIP_POLARIS12:
912                 chip_name = "polaris12";
913                 break;
914         case CHIP_STONEY:
915                 chip_name = "stoney";
916                 break;
917         default:
918                 BUG();
919         }
920
921         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
922         err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
923         if (err)
924                 goto out;
925         err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
926         if (err)
927                 goto out;
928         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
929         adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
930         adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
931
932         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
933         err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
934         if (err)
935                 goto out;
936         err = amdgpu_ucode_validate(adev->gfx.me_fw);
937         if (err)
938                 goto out;
939         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
940         adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
941
942         adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
943
944         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
945         err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
946         if (err)
947                 goto out;
948         err = amdgpu_ucode_validate(adev->gfx.ce_fw);
949         if (err)
950                 goto out;
951         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
952         adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
953         adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
954
955         /*
956          * Support for MCBP/Virtualization in combination with chained IBs is
957          * formal released on feature version #46
958          */
959         if (adev->gfx.ce_feature_version >= 46 &&
960             adev->gfx.pfp_feature_version >= 46) {
961                 adev->virt.chained_ib_support = true;
962                 DRM_INFO("Chained IB support enabled!\n");
963         } else
964                 adev->virt.chained_ib_support = false;
965
966         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
967         err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
968         if (err)
969                 goto out;
970         err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
971         rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
972         adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
973         adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
974
975         adev->gfx.rlc.save_and_restore_offset =
976                         le32_to_cpu(rlc_hdr->save_and_restore_offset);
977         adev->gfx.rlc.clear_state_descriptor_offset =
978                         le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
979         adev->gfx.rlc.avail_scratch_ram_locations =
980                         le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
981         adev->gfx.rlc.reg_restore_list_size =
982                         le32_to_cpu(rlc_hdr->reg_restore_list_size);
983         adev->gfx.rlc.reg_list_format_start =
984                         le32_to_cpu(rlc_hdr->reg_list_format_start);
985         adev->gfx.rlc.reg_list_format_separate_start =
986                         le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
987         adev->gfx.rlc.starting_offsets_start =
988                         le32_to_cpu(rlc_hdr->starting_offsets_start);
989         adev->gfx.rlc.reg_list_format_size_bytes =
990                         le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
991         adev->gfx.rlc.reg_list_size_bytes =
992                         le32_to_cpu(rlc_hdr->reg_list_size_bytes);
993
994         adev->gfx.rlc.register_list_format =
995                         kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
996                                         adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
997
998         if (!adev->gfx.rlc.register_list_format) {
999                 err = -ENOMEM;
1000                 goto out;
1001         }
1002
1003         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1004                         le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1005         for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++)
1006                 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
1007
1008         adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1009
1010         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1011                         le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1012         for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++)
1013                 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
1014
1015         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1016         err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1017         if (err)
1018                 goto out;
1019         err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1020         if (err)
1021                 goto out;
1022         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1023         adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1024         adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1025
1026         if ((adev->asic_type != CHIP_STONEY) &&
1027             (adev->asic_type != CHIP_TOPAZ)) {
1028                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1029                 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1030                 if (!err) {
1031                         err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1032                         if (err)
1033                                 goto out;
1034                         cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1035                                 adev->gfx.mec2_fw->data;
1036                         adev->gfx.mec2_fw_version =
1037                                 le32_to_cpu(cp_hdr->header.ucode_version);
1038                         adev->gfx.mec2_feature_version =
1039                                 le32_to_cpu(cp_hdr->ucode_feature_version);
1040                 } else {
1041                         err = 0;
1042                         adev->gfx.mec2_fw = NULL;
1043                 }
1044         }
1045
1046         if (adev->firmware.load_type == AMDGPU_FW_LOAD_SMU) {
1047                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1048                 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1049                 info->fw = adev->gfx.pfp_fw;
1050                 header = (const struct common_firmware_header *)info->fw->data;
1051                 adev->firmware.fw_size +=
1052                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1053
1054                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1055                 info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1056                 info->fw = adev->gfx.me_fw;
1057                 header = (const struct common_firmware_header *)info->fw->data;
1058                 adev->firmware.fw_size +=
1059                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1060
1061                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1062                 info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1063                 info->fw = adev->gfx.ce_fw;
1064                 header = (const struct common_firmware_header *)info->fw->data;
1065                 adev->firmware.fw_size +=
1066                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1067
1068                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1069                 info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1070                 info->fw = adev->gfx.rlc_fw;
1071                 header = (const struct common_firmware_header *)info->fw->data;
1072                 adev->firmware.fw_size +=
1073                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1074
1075                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1076                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1077                 info->fw = adev->gfx.mec_fw;
1078                 header = (const struct common_firmware_header *)info->fw->data;
1079                 adev->firmware.fw_size +=
1080                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1081
1082                 /* we need account JT in */
1083                 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1084                 adev->firmware.fw_size +=
1085                         ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE);
1086
1087                 if (amdgpu_sriov_vf(adev)) {
1088                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_STORAGE];
1089                         info->ucode_id = AMDGPU_UCODE_ID_STORAGE;
1090                         info->fw = adev->gfx.mec_fw;
1091                         adev->firmware.fw_size +=
1092                                 ALIGN(le32_to_cpu(64 * PAGE_SIZE), PAGE_SIZE);
1093                 }
1094
1095                 if (adev->gfx.mec2_fw) {
1096                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1097                         info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1098                         info->fw = adev->gfx.mec2_fw;
1099                         header = (const struct common_firmware_header *)info->fw->data;
1100                         adev->firmware.fw_size +=
1101                                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1102                 }
1103
1104         }
1105
1106 out:
1107         if (err) {
1108                 dev_err(adev->dev,
1109                         "gfx8: Failed to load firmware \"%s\"\n",
1110                         fw_name);
1111                 release_firmware(adev->gfx.pfp_fw);
1112                 adev->gfx.pfp_fw = NULL;
1113                 release_firmware(adev->gfx.me_fw);
1114                 adev->gfx.me_fw = NULL;
1115                 release_firmware(adev->gfx.ce_fw);
1116                 adev->gfx.ce_fw = NULL;
1117                 release_firmware(adev->gfx.rlc_fw);
1118                 adev->gfx.rlc_fw = NULL;
1119                 release_firmware(adev->gfx.mec_fw);
1120                 adev->gfx.mec_fw = NULL;
1121                 release_firmware(adev->gfx.mec2_fw);
1122                 adev->gfx.mec2_fw = NULL;
1123         }
1124         return err;
1125 }
1126
1127 static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
1128                                     volatile u32 *buffer)
1129 {
1130         u32 count = 0, i;
1131         const struct cs_section_def *sect = NULL;
1132         const struct cs_extent_def *ext = NULL;
1133
1134         if (adev->gfx.rlc.cs_data == NULL)
1135                 return;
1136         if (buffer == NULL)
1137                 return;
1138
1139         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1140         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1141
1142         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1143         buffer[count++] = cpu_to_le32(0x80000000);
1144         buffer[count++] = cpu_to_le32(0x80000000);
1145
1146         for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1147                 for (ext = sect->section; ext->extent != NULL; ++ext) {
1148                         if (sect->id == SECT_CONTEXT) {
1149                                 buffer[count++] =
1150                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1151                                 buffer[count++] = cpu_to_le32(ext->reg_index -
1152                                                 PACKET3_SET_CONTEXT_REG_START);
1153                                 for (i = 0; i < ext->reg_count; i++)
1154                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
1155                         } else {
1156                                 return;
1157                         }
1158                 }
1159         }
1160
1161         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1162         buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
1163                         PACKET3_SET_CONTEXT_REG_START);
1164         buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config);
1165         buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1);
1166
1167         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1168         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1169
1170         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1171         buffer[count++] = cpu_to_le32(0);
1172 }
1173
1174 static void cz_init_cp_jump_table(struct amdgpu_device *adev)
1175 {
1176         const __le32 *fw_data;
1177         volatile u32 *dst_ptr;
1178         int me, i, max_me = 4;
1179         u32 bo_offset = 0;
1180         u32 table_offset, table_size;
1181
1182         if (adev->asic_type == CHIP_CARRIZO)
1183                 max_me = 5;
1184
1185         /* write the cp table buffer */
1186         dst_ptr = adev->gfx.rlc.cp_table_ptr;
1187         for (me = 0; me < max_me; me++) {
1188                 if (me == 0) {
1189                         const struct gfx_firmware_header_v1_0 *hdr =
1190                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1191                         fw_data = (const __le32 *)
1192                                 (adev->gfx.ce_fw->data +
1193                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1194                         table_offset = le32_to_cpu(hdr->jt_offset);
1195                         table_size = le32_to_cpu(hdr->jt_size);
1196                 } else if (me == 1) {
1197                         const struct gfx_firmware_header_v1_0 *hdr =
1198                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1199                         fw_data = (const __le32 *)
1200                                 (adev->gfx.pfp_fw->data +
1201                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1202                         table_offset = le32_to_cpu(hdr->jt_offset);
1203                         table_size = le32_to_cpu(hdr->jt_size);
1204                 } else if (me == 2) {
1205                         const struct gfx_firmware_header_v1_0 *hdr =
1206                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1207                         fw_data = (const __le32 *)
1208                                 (adev->gfx.me_fw->data +
1209                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1210                         table_offset = le32_to_cpu(hdr->jt_offset);
1211                         table_size = le32_to_cpu(hdr->jt_size);
1212                 } else if (me == 3) {
1213                         const struct gfx_firmware_header_v1_0 *hdr =
1214                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1215                         fw_data = (const __le32 *)
1216                                 (adev->gfx.mec_fw->data +
1217                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1218                         table_offset = le32_to_cpu(hdr->jt_offset);
1219                         table_size = le32_to_cpu(hdr->jt_size);
1220                 } else  if (me == 4) {
1221                         const struct gfx_firmware_header_v1_0 *hdr =
1222                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
1223                         fw_data = (const __le32 *)
1224                                 (adev->gfx.mec2_fw->data +
1225                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1226                         table_offset = le32_to_cpu(hdr->jt_offset);
1227                         table_size = le32_to_cpu(hdr->jt_size);
1228                 }
1229
1230                 for (i = 0; i < table_size; i ++) {
1231                         dst_ptr[bo_offset + i] =
1232                                 cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
1233                 }
1234
1235                 bo_offset += table_size;
1236         }
1237 }
1238
1239 static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev)
1240 {
1241         amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, NULL, NULL);
1242         amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, NULL, NULL);
1243 }
1244
1245 static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
1246 {
1247         volatile u32 *dst_ptr;
1248         u32 dws;
1249         const struct cs_section_def *cs_data;
1250         int r;
1251
1252         adev->gfx.rlc.cs_data = vi_cs_data;
1253
1254         cs_data = adev->gfx.rlc.cs_data;
1255
1256         if (cs_data) {
1257                 /* clear state block */
1258                 adev->gfx.rlc.clear_state_size = dws = gfx_v8_0_get_csb_size(adev);
1259
1260                 r = amdgpu_bo_create_reserved(adev, dws * 4, PAGE_SIZE,
1261                                               AMDGPU_GEM_DOMAIN_VRAM,
1262                                               &adev->gfx.rlc.clear_state_obj,
1263                                               &adev->gfx.rlc.clear_state_gpu_addr,
1264                                               (void **)&adev->gfx.rlc.cs_ptr);
1265                 if (r) {
1266                         dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r);
1267                         gfx_v8_0_rlc_fini(adev);
1268                         return r;
1269                 }
1270
1271                 /* set up the cs buffer */
1272                 dst_ptr = adev->gfx.rlc.cs_ptr;
1273                 gfx_v8_0_get_csb_buffer(adev, dst_ptr);
1274                 amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
1275                 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1276         }
1277
1278         if ((adev->asic_type == CHIP_CARRIZO) ||
1279             (adev->asic_type == CHIP_STONEY)) {
1280                 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1281                 r = amdgpu_bo_create_reserved(adev, adev->gfx.rlc.cp_table_size,
1282                                               PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
1283                                               &adev->gfx.rlc.cp_table_obj,
1284                                               &adev->gfx.rlc.cp_table_gpu_addr,
1285                                               (void **)&adev->gfx.rlc.cp_table_ptr);
1286                 if (r) {
1287                         dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r);
1288                         return r;
1289                 }
1290
1291                 cz_init_cp_jump_table(adev);
1292
1293                 amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj);
1294                 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1295         }
1296
1297         return 0;
1298 }
1299
1300 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
1301 {
1302         amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1303 }
1304
1305 static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
1306 {
1307         int r;
1308         u32 *hpd;
1309         size_t mec_hpd_size;
1310
1311         bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1312
1313         /* take ownership of the relevant compute queues */
1314         amdgpu_gfx_compute_queue_acquire(adev);
1315
1316         mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE;
1317
1318         r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1319                                       AMDGPU_GEM_DOMAIN_GTT,
1320                                       &adev->gfx.mec.hpd_eop_obj,
1321                                       &adev->gfx.mec.hpd_eop_gpu_addr,
1322                                       (void **)&hpd);
1323         if (r) {
1324                 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1325                 return r;
1326         }
1327
1328         memset(hpd, 0, mec_hpd_size);
1329
1330         amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1331         amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1332
1333         return 0;
1334 }
1335
1336 static const u32 vgpr_init_compute_shader[] =
1337 {
1338         0x7e000209, 0x7e020208,
1339         0x7e040207, 0x7e060206,
1340         0x7e080205, 0x7e0a0204,
1341         0x7e0c0203, 0x7e0e0202,
1342         0x7e100201, 0x7e120200,
1343         0x7e140209, 0x7e160208,
1344         0x7e180207, 0x7e1a0206,
1345         0x7e1c0205, 0x7e1e0204,
1346         0x7e200203, 0x7e220202,
1347         0x7e240201, 0x7e260200,
1348         0x7e280209, 0x7e2a0208,
1349         0x7e2c0207, 0x7e2e0206,
1350         0x7e300205, 0x7e320204,
1351         0x7e340203, 0x7e360202,
1352         0x7e380201, 0x7e3a0200,
1353         0x7e3c0209, 0x7e3e0208,
1354         0x7e400207, 0x7e420206,
1355         0x7e440205, 0x7e460204,
1356         0x7e480203, 0x7e4a0202,
1357         0x7e4c0201, 0x7e4e0200,
1358         0x7e500209, 0x7e520208,
1359         0x7e540207, 0x7e560206,
1360         0x7e580205, 0x7e5a0204,
1361         0x7e5c0203, 0x7e5e0202,
1362         0x7e600201, 0x7e620200,
1363         0x7e640209, 0x7e660208,
1364         0x7e680207, 0x7e6a0206,
1365         0x7e6c0205, 0x7e6e0204,
1366         0x7e700203, 0x7e720202,
1367         0x7e740201, 0x7e760200,
1368         0x7e780209, 0x7e7a0208,
1369         0x7e7c0207, 0x7e7e0206,
1370         0xbf8a0000, 0xbf810000,
1371 };
1372
1373 static const u32 sgpr_init_compute_shader[] =
1374 {
1375         0xbe8a0100, 0xbe8c0102,
1376         0xbe8e0104, 0xbe900106,
1377         0xbe920108, 0xbe940100,
1378         0xbe960102, 0xbe980104,
1379         0xbe9a0106, 0xbe9c0108,
1380         0xbe9e0100, 0xbea00102,
1381         0xbea20104, 0xbea40106,
1382         0xbea60108, 0xbea80100,
1383         0xbeaa0102, 0xbeac0104,
1384         0xbeae0106, 0xbeb00108,
1385         0xbeb20100, 0xbeb40102,
1386         0xbeb60104, 0xbeb80106,
1387         0xbeba0108, 0xbebc0100,
1388         0xbebe0102, 0xbec00104,
1389         0xbec20106, 0xbec40108,
1390         0xbec60100, 0xbec80102,
1391         0xbee60004, 0xbee70005,
1392         0xbeea0006, 0xbeeb0007,
1393         0xbee80008, 0xbee90009,
1394         0xbefc0000, 0xbf8a0000,
1395         0xbf810000, 0x00000000,
1396 };
1397
1398 static const u32 vgpr_init_regs[] =
1399 {
1400         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1401         mmCOMPUTE_RESOURCE_LIMITS, 0,
1402         mmCOMPUTE_NUM_THREAD_X, 256*4,
1403         mmCOMPUTE_NUM_THREAD_Y, 1,
1404         mmCOMPUTE_NUM_THREAD_Z, 1,
1405         mmCOMPUTE_PGM_RSRC2, 20,
1406         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1407         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1408         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1409         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1410         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1411         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1412         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1413         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1414         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1415         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1416 };
1417
1418 static const u32 sgpr1_init_regs[] =
1419 {
1420         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1421         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1422         mmCOMPUTE_NUM_THREAD_X, 256*5,
1423         mmCOMPUTE_NUM_THREAD_Y, 1,
1424         mmCOMPUTE_NUM_THREAD_Z, 1,
1425         mmCOMPUTE_PGM_RSRC2, 20,
1426         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1427         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1428         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1429         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1430         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1431         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1432         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1433         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1434         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1435         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1436 };
1437
1438 static const u32 sgpr2_init_regs[] =
1439 {
1440         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1441         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1442         mmCOMPUTE_NUM_THREAD_X, 256*5,
1443         mmCOMPUTE_NUM_THREAD_Y, 1,
1444         mmCOMPUTE_NUM_THREAD_Z, 1,
1445         mmCOMPUTE_PGM_RSRC2, 20,
1446         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1447         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1448         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1449         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1450         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1451         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1452         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1453         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1454         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1455         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1456 };
1457
1458 static const u32 sec_ded_counter_registers[] =
1459 {
1460         mmCPC_EDC_ATC_CNT,
1461         mmCPC_EDC_SCRATCH_CNT,
1462         mmCPC_EDC_UCODE_CNT,
1463         mmCPF_EDC_ATC_CNT,
1464         mmCPF_EDC_ROQ_CNT,
1465         mmCPF_EDC_TAG_CNT,
1466         mmCPG_EDC_ATC_CNT,
1467         mmCPG_EDC_DMA_CNT,
1468         mmCPG_EDC_TAG_CNT,
1469         mmDC_EDC_CSINVOC_CNT,
1470         mmDC_EDC_RESTORE_CNT,
1471         mmDC_EDC_STATE_CNT,
1472         mmGDS_EDC_CNT,
1473         mmGDS_EDC_GRBM_CNT,
1474         mmGDS_EDC_OA_DED,
1475         mmSPI_EDC_CNT,
1476         mmSQC_ATC_EDC_GATCL1_CNT,
1477         mmSQC_EDC_CNT,
1478         mmSQ_EDC_DED_CNT,
1479         mmSQ_EDC_INFO,
1480         mmSQ_EDC_SEC_CNT,
1481         mmTCC_EDC_CNT,
1482         mmTCP_ATC_EDC_GATCL1_CNT,
1483         mmTCP_EDC_CNT,
1484         mmTD_EDC_CNT
1485 };
1486
1487 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1488 {
1489         struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1490         struct amdgpu_ib ib;
1491         struct dma_fence *f = NULL;
1492         int r, i;
1493         u32 tmp;
1494         unsigned total_size, vgpr_offset, sgpr_offset;
1495         u64 gpu_addr;
1496
1497         /* only supported on CZ */
1498         if (adev->asic_type != CHIP_CARRIZO)
1499                 return 0;
1500
1501         /* bail if the compute ring is not ready */
1502         if (!ring->ready)
1503                 return 0;
1504
1505         tmp = RREG32(mmGB_EDC_MODE);
1506         WREG32(mmGB_EDC_MODE, 0);
1507
1508         total_size =
1509                 (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1510         total_size +=
1511                 (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1512         total_size +=
1513                 (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1514         total_size = ALIGN(total_size, 256);
1515         vgpr_offset = total_size;
1516         total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1517         sgpr_offset = total_size;
1518         total_size += sizeof(sgpr_init_compute_shader);
1519
1520         /* allocate an indirect buffer to put the commands in */
1521         memset(&ib, 0, sizeof(ib));
1522         r = amdgpu_ib_get(adev, NULL, total_size, &ib);
1523         if (r) {
1524                 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1525                 return r;
1526         }
1527
1528         /* load the compute shaders */
1529         for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1530                 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1531
1532         for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1533                 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1534
1535         /* init the ib length to 0 */
1536         ib.length_dw = 0;
1537
1538         /* VGPR */
1539         /* write the register state for the compute dispatch */
1540         for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1541                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1542                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1543                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1544         }
1545         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1546         gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1547         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1548         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1549         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1550         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1551
1552         /* write dispatch packet */
1553         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1554         ib.ptr[ib.length_dw++] = 8; /* x */
1555         ib.ptr[ib.length_dw++] = 1; /* y */
1556         ib.ptr[ib.length_dw++] = 1; /* z */
1557         ib.ptr[ib.length_dw++] =
1558                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1559
1560         /* write CS partial flush packet */
1561         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1562         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1563
1564         /* SGPR1 */
1565         /* write the register state for the compute dispatch */
1566         for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1567                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1568                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1569                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1570         }
1571         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1572         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1573         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1574         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1575         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1576         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1577
1578         /* write dispatch packet */
1579         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1580         ib.ptr[ib.length_dw++] = 8; /* x */
1581         ib.ptr[ib.length_dw++] = 1; /* y */
1582         ib.ptr[ib.length_dw++] = 1; /* z */
1583         ib.ptr[ib.length_dw++] =
1584                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1585
1586         /* write CS partial flush packet */
1587         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1588         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1589
1590         /* SGPR2 */
1591         /* write the register state for the compute dispatch */
1592         for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1593                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1594                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1595                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1596         }
1597         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1598         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1599         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1600         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1601         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1602         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1603
1604         /* write dispatch packet */
1605         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1606         ib.ptr[ib.length_dw++] = 8; /* x */
1607         ib.ptr[ib.length_dw++] = 1; /* y */
1608         ib.ptr[ib.length_dw++] = 1; /* z */
1609         ib.ptr[ib.length_dw++] =
1610                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1611
1612         /* write CS partial flush packet */
1613         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1614         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1615
1616         /* shedule the ib on the ring */
1617         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1618         if (r) {
1619                 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1620                 goto fail;
1621         }
1622
1623         /* wait for the GPU to finish processing the IB */
1624         r = dma_fence_wait(f, false);
1625         if (r) {
1626                 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1627                 goto fail;
1628         }
1629
1630         tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1631         tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1632         WREG32(mmGB_EDC_MODE, tmp);
1633
1634         tmp = RREG32(mmCC_GC_EDC_CONFIG);
1635         tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1636         WREG32(mmCC_GC_EDC_CONFIG, tmp);
1637
1638
1639         /* read back registers to clear the counters */
1640         for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1641                 RREG32(sec_ded_counter_registers[i]);
1642
1643 fail:
1644         amdgpu_ib_free(adev, &ib, NULL);
1645         dma_fence_put(f);
1646
1647         return r;
1648 }
1649
1650 static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1651 {
1652         u32 gb_addr_config;
1653         u32 mc_shared_chmap, mc_arb_ramcfg;
1654         u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1655         u32 tmp;
1656         int ret;
1657
1658         switch (adev->asic_type) {
1659         case CHIP_TOPAZ:
1660                 adev->gfx.config.max_shader_engines = 1;
1661                 adev->gfx.config.max_tile_pipes = 2;
1662                 adev->gfx.config.max_cu_per_sh = 6;
1663                 adev->gfx.config.max_sh_per_se = 1;
1664                 adev->gfx.config.max_backends_per_se = 2;
1665                 adev->gfx.config.max_texture_channel_caches = 2;
1666                 adev->gfx.config.max_gprs = 256;
1667                 adev->gfx.config.max_gs_threads = 32;
1668                 adev->gfx.config.max_hw_contexts = 8;
1669
1670                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1671                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1672                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1673                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1674                 gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1675                 break;
1676         case CHIP_FIJI:
1677                 adev->gfx.config.max_shader_engines = 4;
1678                 adev->gfx.config.max_tile_pipes = 16;
1679                 adev->gfx.config.max_cu_per_sh = 16;
1680                 adev->gfx.config.max_sh_per_se = 1;
1681                 adev->gfx.config.max_backends_per_se = 4;
1682                 adev->gfx.config.max_texture_channel_caches = 16;
1683                 adev->gfx.config.max_gprs = 256;
1684                 adev->gfx.config.max_gs_threads = 32;
1685                 adev->gfx.config.max_hw_contexts = 8;
1686
1687                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1688                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1689                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1690                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1691                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1692                 break;
1693         case CHIP_POLARIS11:
1694         case CHIP_POLARIS12:
1695                 ret = amdgpu_atombios_get_gfx_info(adev);
1696                 if (ret)
1697                         return ret;
1698                 adev->gfx.config.max_gprs = 256;
1699                 adev->gfx.config.max_gs_threads = 32;
1700                 adev->gfx.config.max_hw_contexts = 8;
1701
1702                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1703                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1704                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1705                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1706                 gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
1707                 break;
1708         case CHIP_POLARIS10:
1709                 ret = amdgpu_atombios_get_gfx_info(adev);
1710                 if (ret)
1711                         return ret;
1712                 adev->gfx.config.max_gprs = 256;
1713                 adev->gfx.config.max_gs_threads = 32;
1714                 adev->gfx.config.max_hw_contexts = 8;
1715
1716                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1717                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1718                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1719                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1720                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1721                 break;
1722         case CHIP_TONGA:
1723                 adev->gfx.config.max_shader_engines = 4;
1724                 adev->gfx.config.max_tile_pipes = 8;
1725                 adev->gfx.config.max_cu_per_sh = 8;
1726                 adev->gfx.config.max_sh_per_se = 1;
1727                 adev->gfx.config.max_backends_per_se = 2;
1728                 adev->gfx.config.max_texture_channel_caches = 8;
1729                 adev->gfx.config.max_gprs = 256;
1730                 adev->gfx.config.max_gs_threads = 32;
1731                 adev->gfx.config.max_hw_contexts = 8;
1732
1733                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1734                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1735                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1736                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1737                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1738                 break;
1739         case CHIP_CARRIZO:
1740                 adev->gfx.config.max_shader_engines = 1;
1741                 adev->gfx.config.max_tile_pipes = 2;
1742                 adev->gfx.config.max_sh_per_se = 1;
1743                 adev->gfx.config.max_backends_per_se = 2;
1744                 adev->gfx.config.max_cu_per_sh = 8;
1745                 adev->gfx.config.max_texture_channel_caches = 2;
1746                 adev->gfx.config.max_gprs = 256;
1747                 adev->gfx.config.max_gs_threads = 32;
1748                 adev->gfx.config.max_hw_contexts = 8;
1749
1750                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1751                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1752                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1753                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1754                 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1755                 break;
1756         case CHIP_STONEY:
1757                 adev->gfx.config.max_shader_engines = 1;
1758                 adev->gfx.config.max_tile_pipes = 2;
1759                 adev->gfx.config.max_sh_per_se = 1;
1760                 adev->gfx.config.max_backends_per_se = 1;
1761                 adev->gfx.config.max_cu_per_sh = 3;
1762                 adev->gfx.config.max_texture_channel_caches = 2;
1763                 adev->gfx.config.max_gprs = 256;
1764                 adev->gfx.config.max_gs_threads = 16;
1765                 adev->gfx.config.max_hw_contexts = 8;
1766
1767                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1768                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1769                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1770                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1771                 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1772                 break;
1773         default:
1774                 adev->gfx.config.max_shader_engines = 2;
1775                 adev->gfx.config.max_tile_pipes = 4;
1776                 adev->gfx.config.max_cu_per_sh = 2;
1777                 adev->gfx.config.max_sh_per_se = 1;
1778                 adev->gfx.config.max_backends_per_se = 2;
1779                 adev->gfx.config.max_texture_channel_caches = 4;
1780                 adev->gfx.config.max_gprs = 256;
1781                 adev->gfx.config.max_gs_threads = 32;
1782                 adev->gfx.config.max_hw_contexts = 8;
1783
1784                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1785                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1786                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1787                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1788                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1789                 break;
1790         }
1791
1792         mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
1793         adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1794         mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1795
1796         adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1797         adev->gfx.config.mem_max_burst_length_bytes = 256;
1798         if (adev->flags & AMD_IS_APU) {
1799                 /* Get memory bank mapping mode. */
1800                 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
1801                 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1802                 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1803
1804                 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
1805                 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1806                 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1807
1808                 /* Validate settings in case only one DIMM installed. */
1809                 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
1810                         dimm00_addr_map = 0;
1811                 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
1812                         dimm01_addr_map = 0;
1813                 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
1814                         dimm10_addr_map = 0;
1815                 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
1816                         dimm11_addr_map = 0;
1817
1818                 /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1819                 /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1820                 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
1821                         adev->gfx.config.mem_row_size_in_kb = 2;
1822                 else
1823                         adev->gfx.config.mem_row_size_in_kb = 1;
1824         } else {
1825                 tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
1826                 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1827                 if (adev->gfx.config.mem_row_size_in_kb > 4)
1828                         adev->gfx.config.mem_row_size_in_kb = 4;
1829         }
1830
1831         adev->gfx.config.shader_engine_tile_size = 32;
1832         adev->gfx.config.num_gpus = 1;
1833         adev->gfx.config.multi_gpu_tile_size = 64;
1834
1835         /* fix up row size */
1836         switch (adev->gfx.config.mem_row_size_in_kb) {
1837         case 1:
1838         default:
1839                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
1840                 break;
1841         case 2:
1842                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
1843                 break;
1844         case 4:
1845                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
1846                 break;
1847         }
1848         adev->gfx.config.gb_addr_config = gb_addr_config;
1849
1850         return 0;
1851 }
1852
1853 static int gfx_v8_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
1854                                         int mec, int pipe, int queue)
1855 {
1856         int r;
1857         unsigned irq_type;
1858         struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
1859
1860         ring = &adev->gfx.compute_ring[ring_id];
1861
1862         /* mec0 is me1 */
1863         ring->me = mec + 1;
1864         ring->pipe = pipe;
1865         ring->queue = queue;
1866
1867         ring->ring_obj = NULL;
1868         ring->use_doorbell = true;
1869         ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + ring_id;
1870         ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
1871                                 + (ring_id * GFX8_MEC_HPD_SIZE);
1872         sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
1873
1874         irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
1875                 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
1876                 + ring->pipe;
1877
1878         /* type-2 packets are deprecated on MEC, use type-3 instead */
1879         r = amdgpu_ring_init(adev, ring, 1024,
1880                         &adev->gfx.eop_irq, irq_type);
1881         if (r)
1882                 return r;
1883
1884
1885         return 0;
1886 }
1887
1888 static int gfx_v8_0_sw_init(void *handle)
1889 {
1890         int i, j, k, r, ring_id;
1891         struct amdgpu_ring *ring;
1892         struct amdgpu_kiq *kiq;
1893         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1894
1895         switch (adev->asic_type) {
1896         case CHIP_FIJI:
1897         case CHIP_TONGA:
1898         case CHIP_POLARIS11:
1899         case CHIP_POLARIS12:
1900         case CHIP_POLARIS10:
1901         case CHIP_CARRIZO:
1902                 adev->gfx.mec.num_mec = 2;
1903                 break;
1904         case CHIP_TOPAZ:
1905         case CHIP_STONEY:
1906         default:
1907                 adev->gfx.mec.num_mec = 1;
1908                 break;
1909         }
1910
1911         adev->gfx.mec.num_pipe_per_mec = 4;
1912         adev->gfx.mec.num_queue_per_pipe = 8;
1913
1914         /* KIQ event */
1915         r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 178, &adev->gfx.kiq.irq);
1916         if (r)
1917                 return r;
1918
1919         /* EOP Event */
1920         r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 181, &adev->gfx.eop_irq);
1921         if (r)
1922                 return r;
1923
1924         /* Privileged reg */
1925         r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 184,
1926                               &adev->gfx.priv_reg_irq);
1927         if (r)
1928                 return r;
1929
1930         /* Privileged inst */
1931         r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 185,
1932                               &adev->gfx.priv_inst_irq);
1933         if (r)
1934                 return r;
1935
1936         adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
1937
1938         gfx_v8_0_scratch_init(adev);
1939
1940         r = gfx_v8_0_init_microcode(adev);
1941         if (r) {
1942                 DRM_ERROR("Failed to load gfx firmware!\n");
1943                 return r;
1944         }
1945
1946         r = gfx_v8_0_rlc_init(adev);
1947         if (r) {
1948                 DRM_ERROR("Failed to init rlc BOs!\n");
1949                 return r;
1950         }
1951
1952         r = gfx_v8_0_mec_init(adev);
1953         if (r) {
1954                 DRM_ERROR("Failed to init MEC BOs!\n");
1955                 return r;
1956         }
1957
1958         /* set up the gfx ring */
1959         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
1960                 ring = &adev->gfx.gfx_ring[i];
1961                 ring->ring_obj = NULL;
1962                 sprintf(ring->name, "gfx");
1963                 /* no gfx doorbells on iceland */
1964                 if (adev->asic_type != CHIP_TOPAZ) {
1965                         ring->use_doorbell = true;
1966                         ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0;
1967                 }
1968
1969                 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
1970                                      AMDGPU_CP_IRQ_GFX_EOP);
1971                 if (r)
1972                         return r;
1973         }
1974
1975
1976         /* set up the compute queues - allocate horizontally across pipes */
1977         ring_id = 0;
1978         for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
1979                 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
1980                         for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
1981                                 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
1982                                         continue;
1983
1984                                 r = gfx_v8_0_compute_ring_init(adev,
1985                                                                 ring_id,
1986                                                                 i, k, j);
1987                                 if (r)
1988                                         return r;
1989
1990                                 ring_id++;
1991                         }
1992                 }
1993         }
1994
1995         r = amdgpu_gfx_kiq_init(adev, GFX8_MEC_HPD_SIZE);
1996         if (r) {
1997                 DRM_ERROR("Failed to init KIQ BOs!\n");
1998                 return r;
1999         }
2000
2001         kiq = &adev->gfx.kiq;
2002         r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
2003         if (r)
2004                 return r;
2005
2006         /* create MQD for all compute queues as well as KIQ for SRIOV case */
2007         r = amdgpu_gfx_compute_mqd_sw_init(adev, sizeof(struct vi_mqd_allocation));
2008         if (r)
2009                 return r;
2010
2011         /* reserve GDS, GWS and OA resource for gfx */
2012         r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size,
2013                                     PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS,
2014                                     &adev->gds.gds_gfx_bo, NULL, NULL);
2015         if (r)
2016                 return r;
2017
2018         r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size,
2019                                     PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS,
2020                                     &adev->gds.gws_gfx_bo, NULL, NULL);
2021         if (r)
2022                 return r;
2023
2024         r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size,
2025                                     PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA,
2026                                     &adev->gds.oa_gfx_bo, NULL, NULL);
2027         if (r)
2028                 return r;
2029
2030         adev->gfx.ce_ram_size = 0x8000;
2031
2032         r = gfx_v8_0_gpu_early_init(adev);
2033         if (r)
2034                 return r;
2035
2036         return 0;
2037 }
2038
2039 static int gfx_v8_0_sw_fini(void *handle)
2040 {
2041         int i;
2042         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2043
2044         amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
2045         amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL);
2046         amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
2047
2048         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2049                 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2050         for (i = 0; i < adev->gfx.num_compute_rings; i++)
2051                 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2052
2053         amdgpu_gfx_compute_mqd_sw_fini(adev);
2054         amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
2055         amdgpu_gfx_kiq_fini(adev);
2056
2057         gfx_v8_0_mec_fini(adev);
2058         gfx_v8_0_rlc_fini(adev);
2059         gfx_v8_0_free_microcode(adev);
2060
2061         return 0;
2062 }
2063
2064 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
2065 {
2066         uint32_t *modearray, *mod2array;
2067         const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
2068         const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
2069         u32 reg_offset;
2070
2071         modearray = adev->gfx.config.tile_mode_array;
2072         mod2array = adev->gfx.config.macrotile_mode_array;
2073
2074         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2075                 modearray[reg_offset] = 0;
2076
2077         for (reg_offset = 0; reg_offset <  num_secondary_tile_mode_states; reg_offset++)
2078                 mod2array[reg_offset] = 0;
2079
2080         switch (adev->asic_type) {
2081         case CHIP_TOPAZ:
2082                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2083                                 PIPE_CONFIG(ADDR_SURF_P2) |
2084                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2085                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2086                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2087                                 PIPE_CONFIG(ADDR_SURF_P2) |
2088                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2089                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2090                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2091                                 PIPE_CONFIG(ADDR_SURF_P2) |
2092                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2093                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2094                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2095                                 PIPE_CONFIG(ADDR_SURF_P2) |
2096                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2097                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2098                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2099                                 PIPE_CONFIG(ADDR_SURF_P2) |
2100                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2101                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2102                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2103                                 PIPE_CONFIG(ADDR_SURF_P2) |
2104                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2105                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2106                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2107                                 PIPE_CONFIG(ADDR_SURF_P2) |
2108                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2109                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2110                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2111                                 PIPE_CONFIG(ADDR_SURF_P2));
2112                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2113                                 PIPE_CONFIG(ADDR_SURF_P2) |
2114                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2115                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2116                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2117                                  PIPE_CONFIG(ADDR_SURF_P2) |
2118                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2119                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2120                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2121                                  PIPE_CONFIG(ADDR_SURF_P2) |
2122                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2123                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2124                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2125                                  PIPE_CONFIG(ADDR_SURF_P2) |
2126                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2127                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2128                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2129                                  PIPE_CONFIG(ADDR_SURF_P2) |
2130                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2131                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2132                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2133                                  PIPE_CONFIG(ADDR_SURF_P2) |
2134                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2135                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2136                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2137                                  PIPE_CONFIG(ADDR_SURF_P2) |
2138                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2139                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2140                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2141                                  PIPE_CONFIG(ADDR_SURF_P2) |
2142                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2143                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2144                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2145                                  PIPE_CONFIG(ADDR_SURF_P2) |
2146                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2147                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2148                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2149                                  PIPE_CONFIG(ADDR_SURF_P2) |
2150                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2151                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2152                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2153                                  PIPE_CONFIG(ADDR_SURF_P2) |
2154                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2155                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2156                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2157                                  PIPE_CONFIG(ADDR_SURF_P2) |
2158                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2159                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2160                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2161                                  PIPE_CONFIG(ADDR_SURF_P2) |
2162                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2163                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2164                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2165                                  PIPE_CONFIG(ADDR_SURF_P2) |
2166                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2167                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2168                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2169                                  PIPE_CONFIG(ADDR_SURF_P2) |
2170                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2171                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2172                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2173                                  PIPE_CONFIG(ADDR_SURF_P2) |
2174                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2175                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2176                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2177                                  PIPE_CONFIG(ADDR_SURF_P2) |
2178                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2179                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2180                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2181                                  PIPE_CONFIG(ADDR_SURF_P2) |
2182                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2183                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2184
2185                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2186                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2187                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2188                                 NUM_BANKS(ADDR_SURF_8_BANK));
2189                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2190                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2191                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2192                                 NUM_BANKS(ADDR_SURF_8_BANK));
2193                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2194                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2195                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2196                                 NUM_BANKS(ADDR_SURF_8_BANK));
2197                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2198                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2199                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2200                                 NUM_BANKS(ADDR_SURF_8_BANK));
2201                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2202                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2203                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2204                                 NUM_BANKS(ADDR_SURF_8_BANK));
2205                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2206                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2207                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2208                                 NUM_BANKS(ADDR_SURF_8_BANK));
2209                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2210                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2211                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2212                                 NUM_BANKS(ADDR_SURF_8_BANK));
2213                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2214                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2215                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2216                                 NUM_BANKS(ADDR_SURF_16_BANK));
2217                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2218                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2219                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2220                                 NUM_BANKS(ADDR_SURF_16_BANK));
2221                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2222                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2223                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2224                                  NUM_BANKS(ADDR_SURF_16_BANK));
2225                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2226                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2227                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2228                                  NUM_BANKS(ADDR_SURF_16_BANK));
2229                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2230                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2231                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2232                                  NUM_BANKS(ADDR_SURF_16_BANK));
2233                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2234                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2235                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2236                                  NUM_BANKS(ADDR_SURF_16_BANK));
2237                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2238                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2239                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2240                                  NUM_BANKS(ADDR_SURF_8_BANK));
2241
2242                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2243                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2244                             reg_offset != 23)
2245                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2246
2247                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2248                         if (reg_offset != 7)
2249                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2250
2251                 break;
2252         case CHIP_FIJI:
2253                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2254                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2255                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2256                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2257                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2258                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2259                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2260                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2261                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2262                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2263                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2264                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2265                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2266                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2267                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2268                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2269                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2270                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2271                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2272                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2273                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2274                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2275                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2276                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2277                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2278                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2279                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2280                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2281                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2282                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2283                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2284                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2285                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2286                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2287                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2288                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2289                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2290                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2291                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2292                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2293                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2294                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2295                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2296                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2297                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2298                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2299                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2300                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2301                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2302                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2303                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2304                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2305                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2306                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2307                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2308                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2309                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2310                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2311                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2312                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2313                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2314                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2315                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2316                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2317                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2318                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2319                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2320                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2321                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2322                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2323                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2324                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2325                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2326                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2327                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2328                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2329                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2330                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2331                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2332                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2333                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2334                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2335                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2336                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2337                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2338                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2339                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2340                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2341                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2342                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2343                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2344                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2345                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2346                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2347                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2348                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2349                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2350                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2351                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2352                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2353                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2354                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2355                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2356                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2357                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2358                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2359                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2360                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2361                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2362                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2363                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2364                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2365                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2366                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2367                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2368                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2369                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2370                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2371                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2372                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2373                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2374                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2375
2376                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2377                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2378                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2379                                 NUM_BANKS(ADDR_SURF_8_BANK));
2380                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2381                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2382                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2383                                 NUM_BANKS(ADDR_SURF_8_BANK));
2384                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2385                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2386                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2387                                 NUM_BANKS(ADDR_SURF_8_BANK));
2388                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2389                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2390                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2391                                 NUM_BANKS(ADDR_SURF_8_BANK));
2392                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2393                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2394                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2395                                 NUM_BANKS(ADDR_SURF_8_BANK));
2396                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2397                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2398                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2399                                 NUM_BANKS(ADDR_SURF_8_BANK));
2400                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2401                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2402                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2403                                 NUM_BANKS(ADDR_SURF_8_BANK));
2404                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2405                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2406                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2407                                 NUM_BANKS(ADDR_SURF_8_BANK));
2408                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2409                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2410                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2411                                 NUM_BANKS(ADDR_SURF_8_BANK));
2412                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2413                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2414                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2415                                  NUM_BANKS(ADDR_SURF_8_BANK));
2416                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2417                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2418                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2419                                  NUM_BANKS(ADDR_SURF_8_BANK));
2420                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2421                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2422                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2423                                  NUM_BANKS(ADDR_SURF_8_BANK));
2424                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2425                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2426                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2427                                  NUM_BANKS(ADDR_SURF_8_BANK));
2428                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2429                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2430                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2431                                  NUM_BANKS(ADDR_SURF_4_BANK));
2432
2433                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2434                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2435
2436                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2437                         if (reg_offset != 7)
2438                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2439
2440                 break;
2441         case CHIP_TONGA:
2442                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2443                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2444                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2445                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2446                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2447                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2448                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2449                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2450                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2451                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2452                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2453                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2454                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2455                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2456                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2457                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2458                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2459                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2460                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2461                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2462                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2463                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2464                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2465                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2466                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2467                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2468                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2469                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2470                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2471                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2472                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2473                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2474                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2475                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2476                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2477                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2478                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2479                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2480                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2481                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2482                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2483                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2484                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2485                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2486                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2487                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2488                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2489                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2490                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2491                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2492                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2493                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2494                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2495                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2496                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2497                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2498                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2499                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2500                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2501                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2502                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2503                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2504                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2505                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2506                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2507                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2508                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2509                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2510                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2511                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2512                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2513                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2514                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2515                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2516                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2517                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2518                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2519                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2520                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2521                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2522                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2523                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2524                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2525                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2526                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2527                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2528                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2529                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2530                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2531                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2532                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2533                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2534                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2535                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2536                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2537                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2538                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2539                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2540                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2541                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2542                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2543                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2544                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2545                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2546                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2547                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2548                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2549                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2550                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2551                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2552                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2553                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2554                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2555                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2556                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2557                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2558                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2559                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2560                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2561                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2562                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2563                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2564
2565                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2566                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2567                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2568                                 NUM_BANKS(ADDR_SURF_16_BANK));
2569                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2570                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2571                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2572                                 NUM_BANKS(ADDR_SURF_16_BANK));
2573                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2574                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2575                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2576                                 NUM_BANKS(ADDR_SURF_16_BANK));
2577                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2578                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2579                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2580                                 NUM_BANKS(ADDR_SURF_16_BANK));
2581                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2582                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2583                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2584                                 NUM_BANKS(ADDR_SURF_16_BANK));
2585                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2586                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2587                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2588                                 NUM_BANKS(ADDR_SURF_16_BANK));
2589                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2590                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2591                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2592                                 NUM_BANKS(ADDR_SURF_16_BANK));
2593                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2594                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2595                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2596                                 NUM_BANKS(ADDR_SURF_16_BANK));
2597                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2598                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2599                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2600                                 NUM_BANKS(ADDR_SURF_16_BANK));
2601                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2602                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2603                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2604                                  NUM_BANKS(ADDR_SURF_16_BANK));
2605                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2606                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2607                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2608                                  NUM_BANKS(ADDR_SURF_16_BANK));
2609                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2610                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2611                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2612                                  NUM_BANKS(ADDR_SURF_8_BANK));
2613                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2614                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2615                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2616                                  NUM_BANKS(ADDR_SURF_4_BANK));
2617                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2618                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2619                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2620                                  NUM_BANKS(ADDR_SURF_4_BANK));
2621
2622                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2623                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2624
2625                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2626                         if (reg_offset != 7)
2627                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2628
2629                 break;
2630         case CHIP_POLARIS11:
2631         case CHIP_POLARIS12:
2632                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2633                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2634                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2635                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2636                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2637                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2638                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2639                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2640                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2641                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2642                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2643                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2644                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2645                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2646                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2647                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2648                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2649                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2650                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2651                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2652                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2653                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2654                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2655                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2656                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2657                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2658                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2659                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2660                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2661                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2662                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2663                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2664                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2665                                 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2666                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2667                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2668                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2669                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2670                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2671                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2672                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2673                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2674                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2675                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2676                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2677                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2678                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2679                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2680                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2681                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2682                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2683                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2684                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2685                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2686                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2687                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2688                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2689                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2690                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2691                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2692                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2693                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2694                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2695                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2696                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2697                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2698                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2699                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2700                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2701                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2702                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2703                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2704                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2705                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2706                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2707                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2708                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2709                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2710                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2711                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2712                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2713                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2714                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2715                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2716                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2717                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2718                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2719                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2720                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2721                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2722                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2723                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2724                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2725                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2726                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2727                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2728                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2729                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2730                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2731                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2732                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2733                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2734                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2735                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2736                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2737                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2738                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2739                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2740                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2741                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2742                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2743                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2744                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2745                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2746                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2747                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2748                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2749                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2750                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2751                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2752                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2753                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2754
2755                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2756                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2757                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2758                                 NUM_BANKS(ADDR_SURF_16_BANK));
2759
2760                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2761                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2762                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2763                                 NUM_BANKS(ADDR_SURF_16_BANK));
2764
2765                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2766                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2767                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2768                                 NUM_BANKS(ADDR_SURF_16_BANK));
2769
2770                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2771                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2772                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2773                                 NUM_BANKS(ADDR_SURF_16_BANK));
2774
2775                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2776                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2777                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2778                                 NUM_BANKS(ADDR_SURF_16_BANK));
2779
2780                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2781                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2782                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2783                                 NUM_BANKS(ADDR_SURF_16_BANK));
2784
2785                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2786                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2787                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2788                                 NUM_BANKS(ADDR_SURF_16_BANK));
2789
2790                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2791                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2792                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2793                                 NUM_BANKS(ADDR_SURF_16_BANK));
2794
2795                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2796                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2797                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2798                                 NUM_BANKS(ADDR_SURF_16_BANK));
2799
2800                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2801                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2802                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2803                                 NUM_BANKS(ADDR_SURF_16_BANK));
2804
2805                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2806                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2807                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2808                                 NUM_BANKS(ADDR_SURF_16_BANK));
2809
2810                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2811                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2812                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2813                                 NUM_BANKS(ADDR_SURF_16_BANK));
2814
2815                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2816                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2817                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2818                                 NUM_BANKS(ADDR_SURF_8_BANK));
2819
2820                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2821                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2822                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2823                                 NUM_BANKS(ADDR_SURF_4_BANK));
2824
2825                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2826                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2827
2828                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2829                         if (reg_offset != 7)
2830                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2831
2832                 break;
2833         case CHIP_POLARIS10:
2834                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2835                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2836                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2837                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2838                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2839                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2840                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2841                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2842                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2843                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2844                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2845                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2846                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2847                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2848                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2849                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2850                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2851                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2852                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2853                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2854                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2855                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2856                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2857                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2858                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2859                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2860                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2861                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2862                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2863                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2864                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2865                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2866                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2867                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2868                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2869                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2870                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2871                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2872                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2873                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2874                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2875                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2876                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2877                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2878                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2879                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2880                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2881                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2882                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2883                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2884                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2885                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2886                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2887                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2888                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2889                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2890                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2891                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2892                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2893                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2894                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2895                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2896                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2897                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2898                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2899                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2900                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2901                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2902                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2903                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2904                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2905                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2906                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2907                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2908                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2909                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2910                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2911                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2912                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2913                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2914                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2915                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2916                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2917                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2918                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2919                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2920                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2921                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2922                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2923                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2924                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2925                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2926                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2927                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2928                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2929                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2930                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2931                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2932                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2933                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2934                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2935                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2936                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2937                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2938                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2939                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2940                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2941                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2942                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2943                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2944                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2945                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2946                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2947                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2948                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2949                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2950                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2951                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2952                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2953                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2954                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2955                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2956
2957                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2958                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2959                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2960                                 NUM_BANKS(ADDR_SURF_16_BANK));
2961
2962                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2963                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2964                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2965                                 NUM_BANKS(ADDR_SURF_16_BANK));
2966
2967                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2968                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2969                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2970                                 NUM_BANKS(ADDR_SURF_16_BANK));
2971
2972                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2973                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2974                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2975                                 NUM_BANKS(ADDR_SURF_16_BANK));
2976
2977                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2978                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2979                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2980                                 NUM_BANKS(ADDR_SURF_16_BANK));
2981
2982                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2983                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2984                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2985                                 NUM_BANKS(ADDR_SURF_16_BANK));
2986
2987                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2988                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2989                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2990                                 NUM_BANKS(ADDR_SURF_16_BANK));
2991
2992                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2993                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2994                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2995                                 NUM_BANKS(ADDR_SURF_16_BANK));
2996
2997                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2998                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2999                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3000                                 NUM_BANKS(ADDR_SURF_16_BANK));
3001
3002                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3003                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3004                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3005                                 NUM_BANKS(ADDR_SURF_16_BANK));
3006
3007                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3008                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3009                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3010                                 NUM_BANKS(ADDR_SURF_16_BANK));
3011
3012                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3013                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3014                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3015                                 NUM_BANKS(ADDR_SURF_8_BANK));
3016
3017                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3018                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3019                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3020                                 NUM_BANKS(ADDR_SURF_4_BANK));
3021
3022                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3023                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3024                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3025                                 NUM_BANKS(ADDR_SURF_4_BANK));
3026
3027                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3028                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3029
3030                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3031                         if (reg_offset != 7)
3032                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3033
3034                 break;
3035         case CHIP_STONEY:
3036                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3037                                 PIPE_CONFIG(ADDR_SURF_P2) |
3038                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3039                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3040                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3041                                 PIPE_CONFIG(ADDR_SURF_P2) |
3042                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3043                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3044                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3045                                 PIPE_CONFIG(ADDR_SURF_P2) |
3046                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3047                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3048                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3049                                 PIPE_CONFIG(ADDR_SURF_P2) |
3050                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3051                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3052                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3053                                 PIPE_CONFIG(ADDR_SURF_P2) |
3054                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3055                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3056                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3057                                 PIPE_CONFIG(ADDR_SURF_P2) |
3058                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3059                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3060                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3061                                 PIPE_CONFIG(ADDR_SURF_P2) |
3062                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3063                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3064                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3065                                 PIPE_CONFIG(ADDR_SURF_P2));
3066                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3067                                 PIPE_CONFIG(ADDR_SURF_P2) |
3068                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3069                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3070                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3071                                  PIPE_CONFIG(ADDR_SURF_P2) |
3072                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3073                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3074                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3075                                  PIPE_CONFIG(ADDR_SURF_P2) |
3076                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3077                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3078                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3079                                  PIPE_CONFIG(ADDR_SURF_P2) |
3080                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3081                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3082                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3083                                  PIPE_CONFIG(ADDR_SURF_P2) |
3084                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3085                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3086                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3087                                  PIPE_CONFIG(ADDR_SURF_P2) |
3088                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3089                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3090                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3091                                  PIPE_CONFIG(ADDR_SURF_P2) |
3092                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3093                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3094                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3095                                  PIPE_CONFIG(ADDR_SURF_P2) |
3096                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3097                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3098                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3099                                  PIPE_CONFIG(ADDR_SURF_P2) |
3100                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3101                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3102                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3103                                  PIPE_CONFIG(ADDR_SURF_P2) |
3104                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3105                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3106                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3107                                  PIPE_CONFIG(ADDR_SURF_P2) |
3108                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3109                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3110                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3111                                  PIPE_CONFIG(ADDR_SURF_P2) |
3112                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3113                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3114                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3115                                  PIPE_CONFIG(ADDR_SURF_P2) |
3116                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3117                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3118                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3119                                  PIPE_CONFIG(ADDR_SURF_P2) |
3120                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3121                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3122                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3123                                  PIPE_CONFIG(ADDR_SURF_P2) |
3124                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3125                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3126                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3127                                  PIPE_CONFIG(ADDR_SURF_P2) |
3128                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3129                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3130                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3131                                  PIPE_CONFIG(ADDR_SURF_P2) |
3132                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3133                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3134                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3135                                  PIPE_CONFIG(ADDR_SURF_P2) |
3136                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3137                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3138
3139                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3140                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3141                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3142                                 NUM_BANKS(ADDR_SURF_8_BANK));
3143                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3144                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3145                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3146                                 NUM_BANKS(ADDR_SURF_8_BANK));
3147                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3148                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3149                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3150                                 NUM_BANKS(ADDR_SURF_8_BANK));
3151                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3152                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3153                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3154                                 NUM_BANKS(ADDR_SURF_8_BANK));
3155                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3156                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3157                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3158                                 NUM_BANKS(ADDR_SURF_8_BANK));
3159                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3160                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3161                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3162                                 NUM_BANKS(ADDR_SURF_8_BANK));
3163                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3164                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3165                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3166                                 NUM_BANKS(ADDR_SURF_8_BANK));
3167                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3168                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3169                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3170                                 NUM_BANKS(ADDR_SURF_16_BANK));
3171                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3172                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3173                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3174                                 NUM_BANKS(ADDR_SURF_16_BANK));
3175                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3176                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3177                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3178                                  NUM_BANKS(ADDR_SURF_16_BANK));
3179                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3180                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3181                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3182                                  NUM_BANKS(ADDR_SURF_16_BANK));
3183                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3184                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3185                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3186                                  NUM_BANKS(ADDR_SURF_16_BANK));
3187                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3188                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3189                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3190                                  NUM_BANKS(ADDR_SURF_16_BANK));
3191                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3192                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3193                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3194                                  NUM_BANKS(ADDR_SURF_8_BANK));
3195
3196                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3197                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3198                             reg_offset != 23)
3199                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3200
3201                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3202                         if (reg_offset != 7)
3203                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3204
3205                 break;
3206         default:
3207                 dev_warn(adev->dev,
3208                          "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
3209                          adev->asic_type);
3210
3211         case CHIP_CARRIZO:
3212                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3213                                 PIPE_CONFIG(ADDR_SURF_P2) |
3214                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3215                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3216                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3217                                 PIPE_CONFIG(ADDR_SURF_P2) |
3218                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3219                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3220                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3221                                 PIPE_CONFIG(ADDR_SURF_P2) |
3222                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3223                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3224                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3225                                 PIPE_CONFIG(ADDR_SURF_P2) |
3226                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3227                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3228                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3229                                 PIPE_CONFIG(ADDR_SURF_P2) |
3230                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3231                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3232                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3233                                 PIPE_CONFIG(ADDR_SURF_P2) |
3234                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3235                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3236                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3237                                 PIPE_CONFIG(ADDR_SURF_P2) |
3238                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3239                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3240                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3241                                 PIPE_CONFIG(ADDR_SURF_P2));
3242                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3243                                 PIPE_CONFIG(ADDR_SURF_P2) |
3244                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3245                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3246                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3247                                  PIPE_CONFIG(ADDR_SURF_P2) |
3248                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3249                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3250                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3251                                  PIPE_CONFIG(ADDR_SURF_P2) |
3252                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3253                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3254                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3255                                  PIPE_CONFIG(ADDR_SURF_P2) |
3256                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3257                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3258                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3259                                  PIPE_CONFIG(ADDR_SURF_P2) |
3260                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3261                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3262                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3263                                  PIPE_CONFIG(ADDR_SURF_P2) |
3264                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3265                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3266                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3267                                  PIPE_CONFIG(ADDR_SURF_P2) |
3268                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3269                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3270                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3271                                  PIPE_CONFIG(ADDR_SURF_P2) |
3272                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3273                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3274                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3275                                  PIPE_CONFIG(ADDR_SURF_P2) |
3276                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3277                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3278                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3279                                  PIPE_CONFIG(ADDR_SURF_P2) |
3280                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3281                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3282                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3283                                  PIPE_CONFIG(ADDR_SURF_P2) |
3284                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3285                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3286                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3287                                  PIPE_CONFIG(ADDR_SURF_P2) |
3288                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3289                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3290                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3291                                  PIPE_CONFIG(ADDR_SURF_P2) |
3292                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3293                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3294                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3295                                  PIPE_CONFIG(ADDR_SURF_P2) |
3296                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3297                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3298                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3299                                  PIPE_CONFIG(ADDR_SURF_P2) |
3300                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3301                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3302                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3303                                  PIPE_CONFIG(ADDR_SURF_P2) |
3304                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3305                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3306                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3307                                  PIPE_CONFIG(ADDR_SURF_P2) |
3308                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3309                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3310                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3311                                  PIPE_CONFIG(ADDR_SURF_P2) |
3312                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3313                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3314
3315                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3316                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3317                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3318                                 NUM_BANKS(ADDR_SURF_8_BANK));
3319                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3320                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3321                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3322                                 NUM_BANKS(ADDR_SURF_8_BANK));
3323                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3324                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3325                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3326                                 NUM_BANKS(ADDR_SURF_8_BANK));
3327                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3328                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3329                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3330                                 NUM_BANKS(ADDR_SURF_8_BANK));
3331                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3332                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3333                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3334                                 NUM_BANKS(ADDR_SURF_8_BANK));
3335                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3336                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3337                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3338                                 NUM_BANKS(ADDR_SURF_8_BANK));
3339                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3340                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3341                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3342                                 NUM_BANKS(ADDR_SURF_8_BANK));
3343                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3344                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3345                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3346                                 NUM_BANKS(ADDR_SURF_16_BANK));
3347                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3348                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3349                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3350                                 NUM_BANKS(ADDR_SURF_16_BANK));
3351                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3352                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3353                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3354                                  NUM_BANKS(ADDR_SURF_16_BANK));
3355                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3356                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3357                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3358                                  NUM_BANKS(ADDR_SURF_16_BANK));
3359                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3360                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3361                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3362                                  NUM_BANKS(ADDR_SURF_16_BANK));
3363                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3364                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3365                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3366                                  NUM_BANKS(ADDR_SURF_16_BANK));
3367                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3368                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3369                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3370                                  NUM_BANKS(ADDR_SURF_8_BANK));
3371
3372                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3373                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3374                             reg_offset != 23)
3375                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3376
3377                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3378                         if (reg_offset != 7)
3379                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3380
3381                 break;
3382         }
3383 }
3384
3385 static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
3386                                   u32 se_num, u32 sh_num, u32 instance)
3387 {
3388         u32 data;
3389
3390         if (instance == 0xffffffff)
3391                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
3392         else
3393                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
3394
3395         if (se_num == 0xffffffff)
3396                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3397         else
3398                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3399
3400         if (sh_num == 0xffffffff)
3401                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3402         else
3403                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3404
3405         WREG32(mmGRBM_GFX_INDEX, data);
3406 }
3407
3408 static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
3409 {
3410         u32 data, mask;
3411
3412         data =  RREG32(mmCC_RB_BACKEND_DISABLE) |
3413                 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3414
3415         data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE);
3416
3417         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
3418                                          adev->gfx.config.max_sh_per_se);
3419
3420         return (~data) & mask;
3421 }
3422
3423 static void
3424 gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1)
3425 {
3426         switch (adev->asic_type) {
3427         case CHIP_FIJI:
3428                 *rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) |
3429                           RB_XSEL2(1) | PKR_MAP(2) |
3430                           PKR_XSEL(1) | PKR_YSEL(1) |
3431                           SE_MAP(2) | SE_XSEL(2) | SE_YSEL(3);
3432                 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) |
3433                            SE_PAIR_YSEL(2);
3434                 break;
3435         case CHIP_TONGA:
3436         case CHIP_POLARIS10:
3437                 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3438                           SE_XSEL(1) | SE_YSEL(1);
3439                 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(2) |
3440                            SE_PAIR_YSEL(2);
3441                 break;
3442         case CHIP_TOPAZ:
3443         case CHIP_CARRIZO:
3444                 *rconf |= RB_MAP_PKR0(2);
3445                 *rconf1 |= 0x0;
3446                 break;
3447         case CHIP_POLARIS11:
3448         case CHIP_POLARIS12:
3449                 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3450                           SE_XSEL(1) | SE_YSEL(1);
3451                 *rconf1 |= 0x0;
3452                 break;
3453         case CHIP_STONEY:
3454                 *rconf |= 0x0;
3455                 *rconf1 |= 0x0;
3456                 break;
3457         default:
3458                 DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type);
3459                 break;
3460         }
3461 }
3462
3463 static void
3464 gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev,
3465                                         u32 raster_config, u32 raster_config_1,
3466                                         unsigned rb_mask, unsigned num_rb)
3467 {
3468         unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1);
3469         unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1);
3470         unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2);
3471         unsigned rb_per_se = num_rb / num_se;
3472         unsigned se_mask[4];
3473         unsigned se;
3474
3475         se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask;
3476         se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask;
3477         se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask;
3478         se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask;
3479
3480         WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4));
3481         WARN_ON(!(sh_per_se == 1 || sh_per_se == 2));
3482         WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2));
3483
3484         if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
3485                              (!se_mask[2] && !se_mask[3]))) {
3486                 raster_config_1 &= ~SE_PAIR_MAP_MASK;
3487
3488                 if (!se_mask[0] && !se_mask[1]) {
3489                         raster_config_1 |=
3490                                 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3);
3491                 } else {
3492                         raster_config_1 |=
3493                                 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0);
3494                 }
3495         }
3496
3497         for (se = 0; se < num_se; se++) {
3498                 unsigned raster_config_se = raster_config;
3499                 unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
3500                 unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
3501                 int idx = (se / 2) * 2;
3502
3503                 if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
3504                         raster_config_se &= ~SE_MAP_MASK;
3505
3506                         if (!se_mask[idx]) {
3507                                 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3);
3508                         } else {
3509                                 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0);
3510                         }
3511                 }
3512
3513                 pkr0_mask &= rb_mask;
3514                 pkr1_mask &= rb_mask;
3515                 if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
3516                         raster_config_se &= ~PKR_MAP_MASK;
3517
3518                         if (!pkr0_mask) {
3519                                 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3);
3520                         } else {
3521                                 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0);
3522                         }
3523                 }
3524
3525                 if (rb_per_se >= 2) {
3526                         unsigned rb0_mask = 1 << (se * rb_per_se);
3527                         unsigned rb1_mask = rb0_mask << 1;
3528
3529                         rb0_mask &= rb_mask;
3530                         rb1_mask &= rb_mask;
3531                         if (!rb0_mask || !rb1_mask) {
3532                                 raster_config_se &= ~RB_MAP_PKR0_MASK;
3533
3534                                 if (!rb0_mask) {
3535                                         raster_config_se |=
3536                                                 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3);
3537                                 } else {
3538                                         raster_config_se |=
3539                                                 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0);
3540                                 }
3541                         }
3542
3543                         if (rb_per_se > 2) {
3544                                 rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
3545                                 rb1_mask = rb0_mask << 1;
3546                                 rb0_mask &= rb_mask;
3547                                 rb1_mask &= rb_mask;
3548                                 if (!rb0_mask || !rb1_mask) {
3549                                         raster_config_se &= ~RB_MAP_PKR1_MASK;
3550
3551                                         if (!rb0_mask) {
3552                                                 raster_config_se |=
3553                                                         RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3);
3554                                         } else {
3555                                                 raster_config_se |=
3556                                                         RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0);
3557                                         }
3558                                 }
3559                         }
3560                 }
3561
3562                 /* GRBM_GFX_INDEX has a different offset on VI */
3563                 gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff);
3564                 WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se);
3565                 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3566         }
3567
3568         /* GRBM_GFX_INDEX has a different offset on VI */
3569         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3570 }
3571
3572 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
3573 {
3574         int i, j;
3575         u32 data;
3576         u32 raster_config = 0, raster_config_1 = 0;
3577         u32 active_rbs = 0;
3578         u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
3579                                         adev->gfx.config.max_sh_per_se;
3580         unsigned num_rb_pipes;
3581
3582         mutex_lock(&adev->grbm_idx_mutex);
3583         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3584                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3585                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3586                         data = gfx_v8_0_get_rb_active_bitmap(adev);
3587                         active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
3588                                                rb_bitmap_width_per_sh);
3589                 }
3590         }
3591         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3592
3593         adev->gfx.config.backend_enable_mask = active_rbs;
3594         adev->gfx.config.num_rbs = hweight32(active_rbs);
3595
3596         num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se *
3597                              adev->gfx.config.max_shader_engines, 16);
3598
3599         gfx_v8_0_raster_config(adev, &raster_config, &raster_config_1);
3600
3601         if (!adev->gfx.config.backend_enable_mask ||
3602                         adev->gfx.config.num_rbs >= num_rb_pipes) {
3603                 WREG32(mmPA_SC_RASTER_CONFIG, raster_config);
3604                 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3605         } else {
3606                 gfx_v8_0_write_harvested_raster_configs(adev, raster_config, raster_config_1,
3607                                                         adev->gfx.config.backend_enable_mask,
3608                                                         num_rb_pipes);
3609         }
3610
3611         /* cache the values for userspace */
3612         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3613                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3614                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3615                         adev->gfx.config.rb_config[i][j].rb_backend_disable =
3616                                 RREG32(mmCC_RB_BACKEND_DISABLE);
3617                         adev->gfx.config.rb_config[i][j].user_rb_backend_disable =
3618                                 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3619                         adev->gfx.config.rb_config[i][j].raster_config =
3620                                 RREG32(mmPA_SC_RASTER_CONFIG);
3621                         adev->gfx.config.rb_config[i][j].raster_config_1 =
3622                                 RREG32(mmPA_SC_RASTER_CONFIG_1);
3623                 }
3624         }
3625         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3626         mutex_unlock(&adev->grbm_idx_mutex);
3627 }
3628
3629 /**
3630  * gfx_v8_0_init_compute_vmid - gart enable
3631  *
3632  * @adev: amdgpu_device pointer
3633  *
3634  * Initialize compute vmid sh_mem registers
3635  *
3636  */
3637 #define DEFAULT_SH_MEM_BASES    (0x6000)
3638 #define FIRST_COMPUTE_VMID      (8)
3639 #define LAST_COMPUTE_VMID       (16)
3640 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
3641 {
3642         int i;
3643         uint32_t sh_mem_config;
3644         uint32_t sh_mem_bases;
3645
3646         /*
3647          * Configure apertures:
3648          * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
3649          * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
3650          * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
3651          */
3652         sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
3653
3654         sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
3655                         SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
3656                         SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
3657                         SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
3658                         MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
3659                         SH_MEM_CONFIG__PRIVATE_ATC_MASK;
3660
3661         mutex_lock(&adev->srbm_mutex);
3662         for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
3663                 vi_srbm_select(adev, 0, 0, 0, i);
3664                 /* CP and shaders */
3665                 WREG32(mmSH_MEM_CONFIG, sh_mem_config);
3666                 WREG32(mmSH_MEM_APE1_BASE, 1);
3667                 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3668                 WREG32(mmSH_MEM_BASES, sh_mem_bases);
3669         }
3670         vi_srbm_select(adev, 0, 0, 0, 0);
3671         mutex_unlock(&adev->srbm_mutex);
3672 }
3673
3674 static void gfx_v8_0_config_init(struct amdgpu_device *adev)
3675 {
3676         switch (adev->asic_type) {
3677         default:
3678                 adev->gfx.config.double_offchip_lds_buf = 1;
3679                 break;
3680         case CHIP_CARRIZO:
3681         case CHIP_STONEY:
3682                 adev->gfx.config.double_offchip_lds_buf = 0;
3683                 break;
3684         }
3685 }
3686
3687 static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
3688 {
3689         u32 tmp, sh_static_mem_cfg;
3690         int i;
3691
3692         WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF);
3693         WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3694         WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3695         WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
3696
3697         gfx_v8_0_tiling_mode_table_init(adev);
3698         gfx_v8_0_setup_rb(adev);
3699         gfx_v8_0_get_cu_info(adev);
3700         gfx_v8_0_config_init(adev);
3701
3702         /* XXX SH_MEM regs */
3703         /* where to put LDS, scratch, GPUVM in FSA64 space */
3704         sh_static_mem_cfg = REG_SET_FIELD(0, SH_STATIC_MEM_CONFIG,
3705                                    SWIZZLE_ENABLE, 1);
3706         sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3707                                    ELEMENT_SIZE, 1);
3708         sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3709                                    INDEX_STRIDE, 3);
3710         WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg);
3711
3712         mutex_lock(&adev->srbm_mutex);
3713         for (i = 0; i < adev->vm_manager.id_mgr[0].num_ids; i++) {
3714                 vi_srbm_select(adev, 0, 0, 0, i);
3715                 /* CP and shaders */
3716                 if (i == 0) {
3717                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
3718                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3719                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3720                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3721                         WREG32(mmSH_MEM_CONFIG, tmp);
3722                         WREG32(mmSH_MEM_BASES, 0);
3723                 } else {
3724                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
3725                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3726                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3727                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3728                         WREG32(mmSH_MEM_CONFIG, tmp);
3729                         tmp = adev->mc.shared_aperture_start >> 48;
3730                         WREG32(mmSH_MEM_BASES, tmp);
3731                 }
3732
3733                 WREG32(mmSH_MEM_APE1_BASE, 1);
3734                 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3735         }
3736         vi_srbm_select(adev, 0, 0, 0, 0);
3737         mutex_unlock(&adev->srbm_mutex);
3738
3739         gfx_v8_0_init_compute_vmid(adev);
3740
3741         mutex_lock(&adev->grbm_idx_mutex);
3742         /*
3743          * making sure that the following register writes will be broadcasted
3744          * to all the shaders
3745          */
3746         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3747
3748         WREG32(mmPA_SC_FIFO_SIZE,
3749                    (adev->gfx.config.sc_prim_fifo_size_frontend <<
3750                         PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
3751                    (adev->gfx.config.sc_prim_fifo_size_backend <<
3752                         PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
3753                    (adev->gfx.config.sc_hiz_tile_fifo_size <<
3754                         PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
3755                    (adev->gfx.config.sc_earlyz_tile_fifo_size <<
3756                         PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
3757
3758         tmp = RREG32(mmSPI_ARB_PRIORITY);
3759         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS0, 2);
3760         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS1, 2);
3761         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS2, 2);
3762         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS3, 2);
3763         WREG32(mmSPI_ARB_PRIORITY, tmp);
3764
3765         mutex_unlock(&adev->grbm_idx_mutex);
3766
3767 }
3768
3769 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3770 {
3771         u32 i, j, k;
3772         u32 mask;
3773
3774         mutex_lock(&adev->grbm_idx_mutex);
3775         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3776                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3777                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3778                         for (k = 0; k < adev->usec_timeout; k++) {
3779                                 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3780                                         break;
3781                                 udelay(1);
3782                         }
3783                 }
3784         }
3785         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3786         mutex_unlock(&adev->grbm_idx_mutex);
3787
3788         mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3789                 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3790                 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3791                 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3792         for (k = 0; k < adev->usec_timeout; k++) {
3793                 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3794                         break;
3795                 udelay(1);
3796         }
3797 }
3798
3799 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3800                                                bool enable)
3801 {
3802         u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3803
3804         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
3805         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
3806         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
3807         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
3808
3809         WREG32(mmCP_INT_CNTL_RING0, tmp);
3810 }
3811
3812 static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
3813 {
3814         /* csib */
3815         WREG32(mmRLC_CSIB_ADDR_HI,
3816                         adev->gfx.rlc.clear_state_gpu_addr >> 32);
3817         WREG32(mmRLC_CSIB_ADDR_LO,
3818                         adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
3819         WREG32(mmRLC_CSIB_LENGTH,
3820                         adev->gfx.rlc.clear_state_size);
3821 }
3822
3823 static void gfx_v8_0_parse_ind_reg_list(int *register_list_format,
3824                                 int ind_offset,
3825                                 int list_size,
3826                                 int *unique_indices,
3827                                 int *indices_count,
3828                                 int max_indices,
3829                                 int *ind_start_offsets,
3830                                 int *offset_count,
3831                                 int max_offset)
3832 {
3833         int indices;
3834         bool new_entry = true;
3835
3836         for (; ind_offset < list_size; ind_offset++) {
3837
3838                 if (new_entry) {
3839                         new_entry = false;
3840                         ind_start_offsets[*offset_count] = ind_offset;
3841                         *offset_count = *offset_count + 1;
3842                         BUG_ON(*offset_count >= max_offset);
3843                 }
3844
3845                 if (register_list_format[ind_offset] == 0xFFFFFFFF) {
3846                         new_entry = true;
3847                         continue;
3848                 }
3849
3850                 ind_offset += 2;
3851
3852                 /* look for the matching indice */
3853                 for (indices = 0;
3854                         indices < *indices_count;
3855                         indices++) {
3856                         if (unique_indices[indices] ==
3857                                 register_list_format[ind_offset])
3858                                 break;
3859                 }
3860
3861                 if (indices >= *indices_count) {
3862                         unique_indices[*indices_count] =
3863                                 register_list_format[ind_offset];
3864                         indices = *indices_count;
3865                         *indices_count = *indices_count + 1;
3866                         BUG_ON(*indices_count >= max_indices);
3867                 }
3868
3869                 register_list_format[ind_offset] = indices;
3870         }
3871 }
3872
3873 static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
3874 {
3875         int i, temp, data;
3876         int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0};
3877         int indices_count = 0;
3878         int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3879         int offset_count = 0;
3880
3881         int list_size;
3882         unsigned int *register_list_format =
3883                 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
3884         if (!register_list_format)
3885                 return -ENOMEM;
3886         memcpy(register_list_format, adev->gfx.rlc.register_list_format,
3887                         adev->gfx.rlc.reg_list_format_size_bytes);
3888
3889         gfx_v8_0_parse_ind_reg_list(register_list_format,
3890                                 RLC_FormatDirectRegListLength,
3891                                 adev->gfx.rlc.reg_list_format_size_bytes >> 2,
3892                                 unique_indices,
3893                                 &indices_count,
3894                                 sizeof(unique_indices) / sizeof(int),
3895                                 indirect_start_offsets,
3896                                 &offset_count,
3897                                 sizeof(indirect_start_offsets)/sizeof(int));
3898
3899         /* save and restore list */
3900         WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1);
3901
3902         WREG32(mmRLC_SRM_ARAM_ADDR, 0);
3903         for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
3904                 WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]);
3905
3906         /* indirect list */
3907         WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start);
3908         for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
3909                 WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]);
3910
3911         list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
3912         list_size = list_size >> 1;
3913         WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size);
3914         WREG32(mmRLC_GPM_SCRATCH_DATA, list_size);
3915
3916         /* starting offsets starts */
3917         WREG32(mmRLC_GPM_SCRATCH_ADDR,
3918                 adev->gfx.rlc.starting_offsets_start);
3919         for (i = 0; i < sizeof(indirect_start_offsets)/sizeof(int); i++)
3920                 WREG32(mmRLC_GPM_SCRATCH_DATA,
3921                                 indirect_start_offsets[i]);
3922
3923         /* unique indices */
3924         temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
3925         data = mmRLC_SRM_INDEX_CNTL_DATA_0;
3926         for (i = 0; i < sizeof(unique_indices) / sizeof(int); i++) {
3927                 if (unique_indices[i] != 0) {
3928                         WREG32(temp + i, unique_indices[i] & 0x3FFFF);
3929                         WREG32(data + i, unique_indices[i] >> 20);
3930                 }
3931         }
3932         kfree(register_list_format);
3933
3934         return 0;
3935 }
3936
3937 static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
3938 {
3939         WREG32_FIELD(RLC_SRM_CNTL, SRM_ENABLE, 1);
3940 }
3941
3942 static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev)
3943 {
3944         uint32_t data;
3945
3946         WREG32_FIELD(CP_RB_WPTR_POLL_CNTL, IDLE_POLL_COUNT, 0x60);
3947
3948         data = REG_SET_FIELD(0, RLC_PG_DELAY, POWER_UP_DELAY, 0x10);
3949         data = REG_SET_FIELD(data, RLC_PG_DELAY, POWER_DOWN_DELAY, 0x10);
3950         data = REG_SET_FIELD(data, RLC_PG_DELAY, CMD_PROPAGATE_DELAY, 0x10);
3951         data = REG_SET_FIELD(data, RLC_PG_DELAY, MEM_SLEEP_DELAY, 0x10);
3952         WREG32(mmRLC_PG_DELAY, data);
3953
3954         WREG32_FIELD(RLC_PG_DELAY_2, SERDES_CMD_DELAY, 0x3);
3955         WREG32_FIELD(RLC_AUTO_PG_CTRL, GRBM_REG_SAVE_GFX_IDLE_THRESHOLD, 0x55f0);
3956
3957 }
3958
3959 static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
3960                                                 bool enable)
3961 {
3962         WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PU_ENABLE, enable ? 1 : 0);
3963 }
3964
3965 static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
3966                                                   bool enable)
3967 {
3968         WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PD_ENABLE, enable ? 1 : 0);
3969 }
3970
3971 static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable)
3972 {
3973         WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 0 : 1);
3974 }
3975
3976 static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
3977 {
3978         if ((adev->asic_type == CHIP_CARRIZO) ||
3979             (adev->asic_type == CHIP_STONEY)) {
3980                 gfx_v8_0_init_csb(adev);
3981                 gfx_v8_0_init_save_restore_list(adev);
3982                 gfx_v8_0_enable_save_restore_machine(adev);
3983                 WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
3984                 gfx_v8_0_init_power_gating(adev);
3985                 WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
3986         } else if ((adev->asic_type == CHIP_POLARIS11) ||
3987                    (adev->asic_type == CHIP_POLARIS12)) {
3988                 gfx_v8_0_init_csb(adev);
3989                 gfx_v8_0_init_save_restore_list(adev);
3990                 gfx_v8_0_enable_save_restore_machine(adev);
3991                 gfx_v8_0_init_power_gating(adev);
3992         }
3993
3994 }
3995
3996 static void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
3997 {
3998         WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 0);
3999
4000         gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4001         gfx_v8_0_wait_for_rlc_serdes(adev);
4002 }
4003
4004 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
4005 {
4006         WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4007         udelay(50);
4008
4009         WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
4010         udelay(50);
4011 }
4012
4013 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
4014 {
4015         WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 1);
4016
4017         /* carrizo do enable cp interrupt after cp inited */
4018         if (!(adev->flags & AMD_IS_APU))
4019                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4020
4021         udelay(50);
4022 }
4023
4024 static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev)
4025 {
4026         const struct rlc_firmware_header_v2_0 *hdr;
4027         const __le32 *fw_data;
4028         unsigned i, fw_size;
4029
4030         if (!adev->gfx.rlc_fw)
4031                 return -EINVAL;
4032
4033         hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
4034         amdgpu_ucode_print_rlc_hdr(&hdr->header);
4035
4036         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
4037                            le32_to_cpu(hdr->header.ucode_array_offset_bytes));
4038         fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
4039
4040         WREG32(mmRLC_GPM_UCODE_ADDR, 0);
4041         for (i = 0; i < fw_size; i++)
4042                 WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
4043         WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
4044
4045         return 0;
4046 }
4047
4048 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
4049 {
4050         int r;
4051         u32 tmp;
4052
4053         gfx_v8_0_rlc_stop(adev);
4054
4055         /* disable CG */
4056         tmp = RREG32(mmRLC_CGCG_CGLS_CTRL);
4057         tmp &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
4058                  RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4059         WREG32(mmRLC_CGCG_CGLS_CTRL, tmp);
4060         if (adev->asic_type == CHIP_POLARIS11 ||
4061             adev->asic_type == CHIP_POLARIS10 ||
4062             adev->asic_type == CHIP_POLARIS12) {
4063                 tmp = RREG32(mmRLC_CGCG_CGLS_CTRL_3D);
4064                 tmp &= ~0x3;
4065                 WREG32(mmRLC_CGCG_CGLS_CTRL_3D, tmp);
4066         }
4067
4068         /* disable PG */
4069         WREG32(mmRLC_PG_CNTL, 0);
4070
4071         gfx_v8_0_rlc_reset(adev);
4072         gfx_v8_0_init_pg(adev);
4073
4074         if (!adev->pp_enabled) {
4075                 if (adev->firmware.load_type != AMDGPU_FW_LOAD_SMU) {
4076                         /* legacy rlc firmware loading */
4077                         r = gfx_v8_0_rlc_load_microcode(adev);
4078                         if (r)
4079                                 return r;
4080                 } else {
4081                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4082                                                         AMDGPU_UCODE_ID_RLC_G);
4083                         if (r)
4084                                 return -EINVAL;
4085                 }
4086         }
4087
4088         gfx_v8_0_rlc_start(adev);
4089
4090         return 0;
4091 }
4092
4093 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
4094 {
4095         int i;
4096         u32 tmp = RREG32(mmCP_ME_CNTL);
4097
4098         if (enable) {
4099                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
4100                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
4101                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
4102         } else {
4103                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
4104                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
4105                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
4106                 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
4107                         adev->gfx.gfx_ring[i].ready = false;
4108         }
4109         WREG32(mmCP_ME_CNTL, tmp);
4110         udelay(50);
4111 }
4112
4113 static int gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
4114 {
4115         const struct gfx_firmware_header_v1_0 *pfp_hdr;
4116         const struct gfx_firmware_header_v1_0 *ce_hdr;
4117         const struct gfx_firmware_header_v1_0 *me_hdr;
4118         const __le32 *fw_data;
4119         unsigned i, fw_size;
4120
4121         if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
4122                 return -EINVAL;
4123
4124         pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
4125                 adev->gfx.pfp_fw->data;
4126         ce_hdr = (const struct gfx_firmware_header_v1_0 *)
4127                 adev->gfx.ce_fw->data;
4128         me_hdr = (const struct gfx_firmware_header_v1_0 *)
4129                 adev->gfx.me_fw->data;
4130
4131         amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
4132         amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
4133         amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
4134
4135         gfx_v8_0_cp_gfx_enable(adev, false);
4136
4137         /* PFP */
4138         fw_data = (const __le32 *)
4139                 (adev->gfx.pfp_fw->data +
4140                  le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
4141         fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
4142         WREG32(mmCP_PFP_UCODE_ADDR, 0);
4143         for (i = 0; i < fw_size; i++)
4144                 WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
4145         WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
4146
4147         /* CE */
4148         fw_data = (const __le32 *)
4149                 (adev->gfx.ce_fw->data +
4150                  le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
4151         fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
4152         WREG32(mmCP_CE_UCODE_ADDR, 0);
4153         for (i = 0; i < fw_size; i++)
4154                 WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
4155         WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
4156
4157         /* ME */
4158         fw_data = (const __le32 *)
4159                 (adev->gfx.me_fw->data +
4160                  le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
4161         fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
4162         WREG32(mmCP_ME_RAM_WADDR, 0);
4163         for (i = 0; i < fw_size; i++)
4164                 WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
4165         WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
4166
4167         return 0;
4168 }
4169
4170 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
4171 {
4172         u32 count = 0;
4173         const struct cs_section_def *sect = NULL;
4174         const struct cs_extent_def *ext = NULL;
4175
4176         /* begin clear state */
4177         count += 2;
4178         /* context control state */
4179         count += 3;
4180
4181         for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4182                 for (ext = sect->section; ext->extent != NULL; ++ext) {
4183                         if (sect->id == SECT_CONTEXT)
4184                                 count += 2 + ext->reg_count;
4185                         else
4186                                 return 0;
4187                 }
4188         }
4189         /* pa_sc_raster_config/pa_sc_raster_config1 */
4190         count += 4;
4191         /* end clear state */
4192         count += 2;
4193         /* clear state */
4194         count += 2;
4195
4196         return count;
4197 }
4198
4199 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
4200 {
4201         struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
4202         const struct cs_section_def *sect = NULL;
4203         const struct cs_extent_def *ext = NULL;
4204         int r, i;
4205
4206         /* init the CP */
4207         WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
4208         WREG32(mmCP_ENDIAN_SWAP, 0);
4209         WREG32(mmCP_DEVICE_ID, 1);
4210
4211         gfx_v8_0_cp_gfx_enable(adev, true);
4212
4213         r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
4214         if (r) {
4215                 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
4216                 return r;
4217         }
4218
4219         /* clear state buffer */
4220         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4221         amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4222
4223         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4224         amdgpu_ring_write(ring, 0x80000000);
4225         amdgpu_ring_write(ring, 0x80000000);
4226
4227         for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4228                 for (ext = sect->section; ext->extent != NULL; ++ext) {
4229                         if (sect->id == SECT_CONTEXT) {
4230                                 amdgpu_ring_write(ring,
4231                                        PACKET3(PACKET3_SET_CONTEXT_REG,
4232                                                ext->reg_count));
4233                                 amdgpu_ring_write(ring,
4234                                        ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
4235                                 for (i = 0; i < ext->reg_count; i++)
4236                                         amdgpu_ring_write(ring, ext->extent[i]);
4237                         }
4238                 }
4239         }
4240
4241         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4242         amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
4243         switch (adev->asic_type) {
4244         case CHIP_TONGA:
4245         case CHIP_POLARIS10:
4246                 amdgpu_ring_write(ring, 0x16000012);
4247                 amdgpu_ring_write(ring, 0x0000002A);
4248                 break;
4249         case CHIP_POLARIS11:
4250         case CHIP_POLARIS12:
4251                 amdgpu_ring_write(ring, 0x16000012);
4252                 amdgpu_ring_write(ring, 0x00000000);
4253                 break;
4254         case CHIP_FIJI:
4255                 amdgpu_ring_write(ring, 0x3a00161a);
4256                 amdgpu_ring_write(ring, 0x0000002e);
4257                 break;
4258         case CHIP_CARRIZO:
4259                 amdgpu_ring_write(ring, 0x00000002);
4260                 amdgpu_ring_write(ring, 0x00000000);
4261                 break;
4262         case CHIP_TOPAZ:
4263                 amdgpu_ring_write(ring, adev->gfx.config.num_rbs == 1 ?
4264                                 0x00000000 : 0x00000002);
4265                 amdgpu_ring_write(ring, 0x00000000);
4266                 break;
4267         case CHIP_STONEY:
4268                 amdgpu_ring_write(ring, 0x00000000);
4269                 amdgpu_ring_write(ring, 0x00000000);
4270                 break;
4271         default:
4272                 BUG();
4273         }
4274
4275         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4276         amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4277
4278         amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4279         amdgpu_ring_write(ring, 0);
4280
4281         /* init the CE partitions */
4282         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4283         amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4284         amdgpu_ring_write(ring, 0x8000);
4285         amdgpu_ring_write(ring, 0x8000);
4286
4287         amdgpu_ring_commit(ring);
4288
4289         return 0;
4290 }
4291 static void gfx_v8_0_set_cpg_door_bell(struct amdgpu_device *adev, struct amdgpu_ring *ring)
4292 {
4293         u32 tmp;
4294         /* no gfx doorbells on iceland */
4295         if (adev->asic_type == CHIP_TOPAZ)
4296                 return;
4297
4298         tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
4299
4300         if (ring->use_doorbell) {
4301                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4302                                 DOORBELL_OFFSET, ring->doorbell_index);
4303                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4304                                                 DOORBELL_HIT, 0);
4305                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4306                                             DOORBELL_EN, 1);
4307         } else {
4308                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
4309         }
4310
4311         WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
4312
4313         if (adev->flags & AMD_IS_APU)
4314                 return;
4315
4316         tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
4317                                         DOORBELL_RANGE_LOWER,
4318                                         AMDGPU_DOORBELL_GFX_RING0);
4319         WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
4320
4321         WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
4322                 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
4323 }
4324
4325 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
4326 {
4327         struct amdgpu_ring *ring;
4328         u32 tmp;
4329         u32 rb_bufsz;
4330         u64 rb_addr, rptr_addr, wptr_gpu_addr;
4331         int r;
4332
4333         /* Set the write pointer delay */
4334         WREG32(mmCP_RB_WPTR_DELAY, 0);
4335
4336         /* set the RB to use vmid 0 */
4337         WREG32(mmCP_RB_VMID, 0);
4338
4339         /* Set ring buffer size */
4340         ring = &adev->gfx.gfx_ring[0];
4341         rb_bufsz = order_base_2(ring->ring_size / 8);
4342         tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
4343         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
4344         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
4345         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
4346 #ifdef __BIG_ENDIAN
4347         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
4348 #endif
4349         WREG32(mmCP_RB0_CNTL, tmp);
4350
4351         /* Initialize the ring buffer's read and write pointers */
4352         WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
4353         ring->wptr = 0;
4354         WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
4355
4356         /* set the wb address wether it's enabled or not */
4357         rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4358         WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
4359         WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
4360
4361         wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4362         WREG32(mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
4363         WREG32(mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
4364         mdelay(1);
4365         WREG32(mmCP_RB0_CNTL, tmp);
4366
4367         rb_addr = ring->gpu_addr >> 8;
4368         WREG32(mmCP_RB0_BASE, rb_addr);
4369         WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
4370
4371         gfx_v8_0_set_cpg_door_bell(adev, ring);
4372         /* start the ring */
4373         amdgpu_ring_clear_ring(ring);
4374         gfx_v8_0_cp_gfx_start(adev);
4375         ring->ready = true;
4376         r = amdgpu_ring_test_ring(ring);
4377         if (r)
4378                 ring->ready = false;
4379
4380         return r;
4381 }
4382
4383 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
4384 {
4385         int i;
4386
4387         if (enable) {
4388                 WREG32(mmCP_MEC_CNTL, 0);
4389         } else {
4390                 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
4391                 for (i = 0; i < adev->gfx.num_compute_rings; i++)
4392                         adev->gfx.compute_ring[i].ready = false;
4393                 adev->gfx.kiq.ring.ready = false;
4394         }
4395         udelay(50);
4396 }
4397
4398 static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev)
4399 {
4400         const struct gfx_firmware_header_v1_0 *mec_hdr;
4401         const __le32 *fw_data;
4402         unsigned i, fw_size;
4403
4404         if (!adev->gfx.mec_fw)
4405                 return -EINVAL;
4406
4407         gfx_v8_0_cp_compute_enable(adev, false);
4408
4409         mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
4410         amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
4411
4412         fw_data = (const __le32 *)
4413                 (adev->gfx.mec_fw->data +
4414                  le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4415         fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4416
4417         /* MEC1 */
4418         WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0);
4419         for (i = 0; i < fw_size; i++)
4420                 WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data+i));
4421         WREG32(mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version);
4422
4423         /* Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
4424         if (adev->gfx.mec2_fw) {
4425                 const struct gfx_firmware_header_v1_0 *mec2_hdr;
4426
4427                 mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
4428                 amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header);
4429
4430                 fw_data = (const __le32 *)
4431                         (adev->gfx.mec2_fw->data +
4432                          le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4433                 fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4434
4435                 WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0);
4436                 for (i = 0; i < fw_size; i++)
4437                         WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data+i));
4438                 WREG32(mmCP_MEC_ME2_UCODE_ADDR, adev->gfx.mec2_fw_version);
4439         }
4440
4441         return 0;
4442 }
4443
4444 /* KIQ functions */
4445 static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring)
4446 {
4447         uint32_t tmp;
4448         struct amdgpu_device *adev = ring->adev;
4449
4450         /* tell RLC which is KIQ queue */
4451         tmp = RREG32(mmRLC_CP_SCHEDULERS);
4452         tmp &= 0xffffff00;
4453         tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
4454         WREG32(mmRLC_CP_SCHEDULERS, tmp);
4455         tmp |= 0x80;
4456         WREG32(mmRLC_CP_SCHEDULERS, tmp);
4457 }
4458
4459 static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev)
4460 {
4461         struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
4462         uint32_t scratch, tmp = 0;
4463         uint64_t queue_mask = 0;
4464         int r, i;
4465
4466         for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
4467                 if (!test_bit(i, adev->gfx.mec.queue_bitmap))
4468                         continue;
4469
4470                 /* This situation may be hit in the future if a new HW
4471                  * generation exposes more than 64 queues. If so, the
4472                  * definition of queue_mask needs updating */
4473                 if (WARN_ON(i >= (sizeof(queue_mask)*8))) {
4474                         DRM_ERROR("Invalid KCQ enabled: %d\n", i);
4475                         break;
4476                 }
4477
4478                 queue_mask |= (1ull << i);
4479         }
4480
4481         r = amdgpu_gfx_scratch_get(adev, &scratch);
4482         if (r) {
4483                 DRM_ERROR("Failed to get scratch reg (%d).\n", r);
4484                 return r;
4485         }
4486         WREG32(scratch, 0xCAFEDEAD);
4487
4488         r = amdgpu_ring_alloc(kiq_ring, (8 * adev->gfx.num_compute_rings) + 11);
4489         if (r) {
4490                 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
4491                 amdgpu_gfx_scratch_free(adev, scratch);
4492                 return r;
4493         }
4494         /* set resources */
4495         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
4496         amdgpu_ring_write(kiq_ring, 0); /* vmid_mask:0 queue_type:0 (KIQ) */
4497         amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */
4498         amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */
4499         amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
4500         amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
4501         amdgpu_ring_write(kiq_ring, 0); /* oac mask */
4502         amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
4503         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4504                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4505                 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
4506                 uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4507
4508                 /* map queues */
4509                 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
4510                 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
4511                 amdgpu_ring_write(kiq_ring,
4512                                   PACKET3_MAP_QUEUES_NUM_QUEUES(1));
4513                 amdgpu_ring_write(kiq_ring,
4514                                   PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index) |
4515                                   PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
4516                                   PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
4517                                   PACKET3_MAP_QUEUES_ME(ring->me == 1 ? 0 : 1)); /* doorbell */
4518                 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
4519                 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
4520                 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
4521                 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
4522         }
4523         /* write to scratch for completion */
4524         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
4525         amdgpu_ring_write(kiq_ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
4526         amdgpu_ring_write(kiq_ring, 0xDEADBEEF);
4527         amdgpu_ring_commit(kiq_ring);
4528
4529         for (i = 0; i < adev->usec_timeout; i++) {
4530                 tmp = RREG32(scratch);
4531                 if (tmp == 0xDEADBEEF)
4532                         break;
4533                 DRM_UDELAY(1);
4534         }
4535         if (i >= adev->usec_timeout) {
4536                 DRM_ERROR("KCQ enable failed (scratch(0x%04X)=0x%08X)\n",
4537                           scratch, tmp);
4538                 r = -EINVAL;
4539         }
4540         amdgpu_gfx_scratch_free(adev, scratch);
4541
4542         return r;
4543 }
4544
4545 static int gfx_v8_0_deactivate_hqd(struct amdgpu_device *adev, u32 req)
4546 {
4547         int i, r = 0;
4548
4549         if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) {
4550                 WREG32_FIELD(CP_HQD_DEQUEUE_REQUEST, DEQUEUE_REQ, req);
4551                 for (i = 0; i < adev->usec_timeout; i++) {
4552                         if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK))
4553                                 break;
4554                         udelay(1);
4555                 }
4556                 if (i == adev->usec_timeout)
4557                         r = -ETIMEDOUT;
4558         }
4559         WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
4560         WREG32(mmCP_HQD_PQ_RPTR, 0);
4561         WREG32(mmCP_HQD_PQ_WPTR, 0);
4562
4563         return r;
4564 }
4565
4566 static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
4567 {
4568         struct amdgpu_device *adev = ring->adev;
4569         struct vi_mqd *mqd = ring->mqd_ptr;
4570         uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
4571         uint32_t tmp;
4572
4573         mqd->header = 0xC0310800;
4574         mqd->compute_pipelinestat_enable = 0x00000001;
4575         mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4576         mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4577         mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4578         mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4579         mqd->compute_misc_reserved = 0x00000003;
4580         if (!(adev->flags & AMD_IS_APU)) {
4581                 mqd->dynamic_cu_mask_addr_lo = lower_32_bits(ring->mqd_gpu_addr
4582                                              + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4583                 mqd->dynamic_cu_mask_addr_hi = upper_32_bits(ring->mqd_gpu_addr
4584                                              + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4585         }
4586         eop_base_addr = ring->eop_gpu_addr >> 8;
4587         mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
4588         mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
4589
4590         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4591         tmp = RREG32(mmCP_HQD_EOP_CONTROL);
4592         tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
4593                         (order_base_2(GFX8_MEC_HPD_SIZE / 4) - 1));
4594
4595         mqd->cp_hqd_eop_control = tmp;
4596
4597         /* enable doorbell? */
4598         tmp = REG_SET_FIELD(RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL),
4599                             CP_HQD_PQ_DOORBELL_CONTROL,
4600                             DOORBELL_EN,
4601                             ring->use_doorbell ? 1 : 0);
4602
4603         mqd->cp_hqd_pq_doorbell_control = tmp;
4604
4605         /* set the pointer to the MQD */
4606         mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
4607         mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
4608
4609         /* set MQD vmid to 0 */
4610         tmp = RREG32(mmCP_MQD_CONTROL);
4611         tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4612         mqd->cp_mqd_control = tmp;
4613
4614         /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4615         hqd_gpu_addr = ring->gpu_addr >> 8;
4616         mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4617         mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4618
4619         /* set up the HQD, this is similar to CP_RB0_CNTL */
4620         tmp = RREG32(mmCP_HQD_PQ_CONTROL);
4621         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4622                             (order_base_2(ring->ring_size / 4) - 1));
4623         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4624                         ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
4625 #ifdef __BIG_ENDIAN
4626         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
4627 #endif
4628         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
4629         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
4630         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4631         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4632         mqd->cp_hqd_pq_control = tmp;
4633
4634         /* set the wb address whether it's enabled or not */
4635         wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4636         mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4637         mqd->cp_hqd_pq_rptr_report_addr_hi =
4638                 upper_32_bits(wb_gpu_addr) & 0xffff;
4639
4640         /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4641         wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4642         mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
4643         mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4644
4645         tmp = 0;
4646         /* enable the doorbell if requested */
4647         if (ring->use_doorbell) {
4648                 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4649                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4650                                 DOORBELL_OFFSET, ring->doorbell_index);
4651
4652                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4653                                          DOORBELL_EN, 1);
4654                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4655                                          DOORBELL_SOURCE, 0);
4656                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4657                                          DOORBELL_HIT, 0);
4658         }
4659
4660         mqd->cp_hqd_pq_doorbell_control = tmp;
4661
4662         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4663         ring->wptr = 0;
4664         mqd->cp_hqd_pq_wptr = ring->wptr;
4665         mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
4666
4667         /* set the vmid for the queue */
4668         mqd->cp_hqd_vmid = 0;
4669
4670         tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
4671         tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
4672         mqd->cp_hqd_persistent_state = tmp;
4673
4674         /* set MTYPE */
4675         tmp = RREG32(mmCP_HQD_IB_CONTROL);
4676         tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
4677         tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MTYPE, 3);
4678         mqd->cp_hqd_ib_control = tmp;
4679
4680         tmp = RREG32(mmCP_HQD_IQ_TIMER);
4681         tmp = REG_SET_FIELD(tmp, CP_HQD_IQ_TIMER, MTYPE, 3);
4682         mqd->cp_hqd_iq_timer = tmp;
4683
4684         tmp = RREG32(mmCP_HQD_CTX_SAVE_CONTROL);
4685         tmp = REG_SET_FIELD(tmp, CP_HQD_CTX_SAVE_CONTROL, MTYPE, 3);
4686         mqd->cp_hqd_ctx_save_control = tmp;
4687
4688         /* defaults */
4689         mqd->cp_hqd_eop_rptr = RREG32(mmCP_HQD_EOP_RPTR);
4690         mqd->cp_hqd_eop_wptr = RREG32(mmCP_HQD_EOP_WPTR);
4691         mqd->cp_hqd_pipe_priority = RREG32(mmCP_HQD_PIPE_PRIORITY);
4692         mqd->cp_hqd_queue_priority = RREG32(mmCP_HQD_QUEUE_PRIORITY);
4693         mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM);
4694         mqd->cp_hqd_ctx_save_base_addr_lo = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_LO);
4695         mqd->cp_hqd_ctx_save_base_addr_hi = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_HI);
4696         mqd->cp_hqd_cntl_stack_offset = RREG32(mmCP_HQD_CNTL_STACK_OFFSET);
4697         mqd->cp_hqd_cntl_stack_size = RREG32(mmCP_HQD_CNTL_STACK_SIZE);
4698         mqd->cp_hqd_wg_state_offset = RREG32(mmCP_HQD_WG_STATE_OFFSET);
4699         mqd->cp_hqd_ctx_save_size = RREG32(mmCP_HQD_CTX_SAVE_SIZE);
4700         mqd->cp_hqd_eop_done_events = RREG32(mmCP_HQD_EOP_EVENTS);
4701         mqd->cp_hqd_error = RREG32(mmCP_HQD_ERROR);
4702         mqd->cp_hqd_eop_wptr_mem = RREG32(mmCP_HQD_EOP_WPTR_MEM);
4703         mqd->cp_hqd_eop_dones = RREG32(mmCP_HQD_EOP_DONES);
4704
4705         /* activate the queue */
4706         mqd->cp_hqd_active = 1;
4707
4708         return 0;
4709 }
4710
4711 int gfx_v8_0_mqd_commit(struct amdgpu_device *adev,
4712                         struct vi_mqd *mqd)
4713 {
4714         uint32_t mqd_reg;
4715         uint32_t *mqd_data;
4716
4717         /* HQD registers extend from mmCP_MQD_BASE_ADDR to mmCP_HQD_ERROR */
4718         mqd_data = &mqd->cp_mqd_base_addr_lo;
4719
4720         /* disable wptr polling */
4721         WREG32_FIELD(CP_PQ_WPTR_POLL_CNTL, EN, 0);
4722
4723         /* program all HQD registers */
4724         for (mqd_reg = mmCP_HQD_VMID; mqd_reg <= mmCP_HQD_EOP_CONTROL; mqd_reg++)
4725                 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4726
4727         /* Tonga errata: EOP RPTR/WPTR should be left unmodified.
4728          * This is safe since EOP RPTR==WPTR for any inactive HQD
4729          * on ASICs that do not support context-save.
4730          * EOP writes/reads can start anywhere in the ring.
4731          */
4732         if (adev->asic_type != CHIP_TONGA) {
4733                 WREG32(mmCP_HQD_EOP_RPTR, mqd->cp_hqd_eop_rptr);
4734                 WREG32(mmCP_HQD_EOP_WPTR, mqd->cp_hqd_eop_wptr);
4735                 WREG32(mmCP_HQD_EOP_WPTR_MEM, mqd->cp_hqd_eop_wptr_mem);
4736         }
4737
4738         for (mqd_reg = mmCP_HQD_EOP_EVENTS; mqd_reg <= mmCP_HQD_ERROR; mqd_reg++)
4739                 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4740
4741         /* activate the HQD */
4742         for (mqd_reg = mmCP_MQD_BASE_ADDR; mqd_reg <= mmCP_HQD_ACTIVE; mqd_reg++)
4743                 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4744
4745         return 0;
4746 }
4747
4748 static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring)
4749 {
4750         struct amdgpu_device *adev = ring->adev;
4751         struct vi_mqd *mqd = ring->mqd_ptr;
4752         int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
4753
4754         gfx_v8_0_kiq_setting(ring);
4755
4756         if (adev->gfx.in_reset) { /* for GPU_RESET case */
4757                 /* reset MQD to a clean status */
4758                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4759                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4760
4761                 /* reset ring buffer */
4762                 ring->wptr = 0;
4763                 amdgpu_ring_clear_ring(ring);
4764                 mutex_lock(&adev->srbm_mutex);
4765                 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4766                 gfx_v8_0_mqd_commit(adev, mqd);
4767                 vi_srbm_select(adev, 0, 0, 0, 0);
4768                 mutex_unlock(&adev->srbm_mutex);
4769         } else {
4770                 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4771                 ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4772                 ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
4773                 mutex_lock(&adev->srbm_mutex);
4774                 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4775                 gfx_v8_0_mqd_init(ring);
4776                 gfx_v8_0_mqd_commit(adev, mqd);
4777                 vi_srbm_select(adev, 0, 0, 0, 0);
4778                 mutex_unlock(&adev->srbm_mutex);
4779
4780                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4781                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4782         }
4783
4784         return 0;
4785 }
4786
4787 static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring)
4788 {
4789         struct amdgpu_device *adev = ring->adev;
4790         struct vi_mqd *mqd = ring->mqd_ptr;
4791         int mqd_idx = ring - &adev->gfx.compute_ring[0];
4792
4793         if (!adev->gfx.in_reset && !adev->gfx.in_suspend) {
4794                 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4795                 ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4796                 ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
4797                 mutex_lock(&adev->srbm_mutex);
4798                 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4799                 gfx_v8_0_mqd_init(ring);
4800                 vi_srbm_select(adev, 0, 0, 0, 0);
4801                 mutex_unlock(&adev->srbm_mutex);
4802
4803                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4804                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4805         } else if (adev->gfx.in_reset) { /* for GPU_RESET case */
4806                 /* reset MQD to a clean status */
4807                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4808                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4809                 /* reset ring buffer */
4810                 ring->wptr = 0;
4811                 amdgpu_ring_clear_ring(ring);
4812         } else {
4813                 amdgpu_ring_clear_ring(ring);
4814         }
4815         return 0;
4816 }
4817
4818 static void gfx_v8_0_set_mec_doorbell_range(struct amdgpu_device *adev)
4819 {
4820         if (adev->asic_type > CHIP_TONGA) {
4821                 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, AMDGPU_DOORBELL_KIQ << 2);
4822                 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, AMDGPU_DOORBELL_MEC_RING7 << 2);
4823         }
4824         /* enable doorbells */
4825         WREG32_FIELD(CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4826 }
4827
4828 static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev)
4829 {
4830         struct amdgpu_ring *ring = NULL;
4831         int r = 0, i;
4832
4833         gfx_v8_0_cp_compute_enable(adev, true);
4834
4835         ring = &adev->gfx.kiq.ring;
4836
4837         r = amdgpu_bo_reserve(ring->mqd_obj, false);
4838         if (unlikely(r != 0))
4839                 goto done;
4840
4841         r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4842         if (!r) {
4843                 r = gfx_v8_0_kiq_init_queue(ring);
4844                 amdgpu_bo_kunmap(ring->mqd_obj);
4845                 ring->mqd_ptr = NULL;
4846         }
4847         amdgpu_bo_unreserve(ring->mqd_obj);
4848         if (r)
4849                 goto done;
4850
4851         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4852                 ring = &adev->gfx.compute_ring[i];
4853
4854                 r = amdgpu_bo_reserve(ring->mqd_obj, false);
4855                 if (unlikely(r != 0))
4856                         goto done;
4857                 r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4858                 if (!r) {
4859                         r = gfx_v8_0_kcq_init_queue(ring);
4860                         amdgpu_bo_kunmap(ring->mqd_obj);
4861                         ring->mqd_ptr = NULL;
4862                 }
4863                 amdgpu_bo_unreserve(ring->mqd_obj);
4864                 if (r)
4865                         goto done;
4866         }
4867
4868         gfx_v8_0_set_mec_doorbell_range(adev);
4869
4870         r = gfx_v8_0_kiq_kcq_enable(adev);
4871         if (r)
4872                 goto done;
4873
4874         /* Test KIQ */
4875         ring = &adev->gfx.kiq.ring;
4876         ring->ready = true;
4877         r = amdgpu_ring_test_ring(ring);
4878         if (r) {
4879                 ring->ready = false;
4880                 goto done;
4881         }
4882
4883         /* Test KCQs */
4884         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4885                 ring = &adev->gfx.compute_ring[i];
4886                 ring->ready = true;
4887                 r = amdgpu_ring_test_ring(ring);
4888                 if (r)
4889                         ring->ready = false;
4890         }
4891
4892 done:
4893         return r;
4894 }
4895
4896 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
4897 {
4898         int r;
4899
4900         if (!(adev->flags & AMD_IS_APU))
4901                 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4902
4903         if (!adev->pp_enabled) {
4904                 if (adev->firmware.load_type != AMDGPU_FW_LOAD_SMU) {
4905                         /* legacy firmware loading */
4906                         r = gfx_v8_0_cp_gfx_load_microcode(adev);
4907                         if (r)
4908                                 return r;
4909
4910                         r = gfx_v8_0_cp_compute_load_microcode(adev);
4911                         if (r)
4912                                 return r;
4913                 } else {
4914                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4915                                                         AMDGPU_UCODE_ID_CP_CE);
4916                         if (r)
4917                                 return -EINVAL;
4918
4919                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4920                                                         AMDGPU_UCODE_ID_CP_PFP);
4921                         if (r)
4922                                 return -EINVAL;
4923
4924                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4925                                                         AMDGPU_UCODE_ID_CP_ME);
4926                         if (r)
4927                                 return -EINVAL;
4928
4929                         if (adev->asic_type == CHIP_TOPAZ) {
4930                                 r = gfx_v8_0_cp_compute_load_microcode(adev);
4931                                 if (r)
4932                                         return r;
4933                         } else {
4934                                 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4935                                                                                  AMDGPU_UCODE_ID_CP_MEC1);
4936                                 if (r)
4937                                         return -EINVAL;
4938                         }
4939                 }
4940         }
4941
4942         r = gfx_v8_0_cp_gfx_resume(adev);
4943         if (r)
4944                 return r;
4945
4946         r = gfx_v8_0_kiq_resume(adev);
4947         if (r)
4948                 return r;
4949
4950         gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4951
4952         return 0;
4953 }
4954
4955 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
4956 {
4957         gfx_v8_0_cp_gfx_enable(adev, enable);
4958         gfx_v8_0_cp_compute_enable(adev, enable);
4959 }
4960
4961 static int gfx_v8_0_hw_init(void *handle)
4962 {
4963         int r;
4964         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4965
4966         gfx_v8_0_init_golden_registers(adev);
4967         gfx_v8_0_gpu_init(adev);
4968
4969         r = gfx_v8_0_rlc_resume(adev);
4970         if (r)
4971                 return r;
4972
4973         r = gfx_v8_0_cp_resume(adev);
4974
4975         return r;
4976 }
4977
4978 static int gfx_v8_0_hw_fini(void *handle)
4979 {
4980         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4981
4982         amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
4983         amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
4984         if (amdgpu_sriov_vf(adev)) {
4985                 pr_debug("For SRIOV client, shouldn't do anything.\n");
4986                 return 0;
4987         }
4988         gfx_v8_0_cp_enable(adev, false);
4989         gfx_v8_0_rlc_stop(adev);
4990
4991         amdgpu_set_powergating_state(adev,
4992                         AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_UNGATE);
4993
4994         return 0;
4995 }
4996
4997 static int gfx_v8_0_suspend(void *handle)
4998 {
4999         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5000         adev->gfx.in_suspend = true;
5001         return gfx_v8_0_hw_fini(adev);
5002 }
5003
5004 static int gfx_v8_0_resume(void *handle)
5005 {
5006         int r;
5007         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5008
5009         r = gfx_v8_0_hw_init(adev);
5010         adev->gfx.in_suspend = false;
5011         return r;
5012 }
5013
5014 static bool gfx_v8_0_is_idle(void *handle)
5015 {
5016         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5017
5018         if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE))
5019                 return false;
5020         else
5021                 return true;
5022 }
5023
5024 static int gfx_v8_0_wait_for_idle(void *handle)
5025 {
5026         unsigned i;
5027         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5028
5029         for (i = 0; i < adev->usec_timeout; i++) {
5030                 if (gfx_v8_0_is_idle(handle))
5031                         return 0;
5032
5033                 udelay(1);
5034         }
5035         return -ETIMEDOUT;
5036 }
5037
5038 static bool gfx_v8_0_check_soft_reset(void *handle)
5039 {
5040         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5041         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5042         u32 tmp;
5043
5044         /* GRBM_STATUS */
5045         tmp = RREG32(mmGRBM_STATUS);
5046         if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
5047                    GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
5048                    GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
5049                    GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
5050                    GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
5051                    GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK |
5052                    GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
5053                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5054                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
5055                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5056                                                 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
5057                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5058                                                 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5059         }
5060
5061         /* GRBM_STATUS2 */
5062         tmp = RREG32(mmGRBM_STATUS2);
5063         if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
5064                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5065                                                 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
5066
5067         if (REG_GET_FIELD(tmp, GRBM_STATUS2, CPF_BUSY) ||
5068             REG_GET_FIELD(tmp, GRBM_STATUS2, CPC_BUSY) ||
5069             REG_GET_FIELD(tmp, GRBM_STATUS2, CPG_BUSY)) {
5070                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5071                                                 SOFT_RESET_CPF, 1);
5072                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5073                                                 SOFT_RESET_CPC, 1);
5074                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5075                                                 SOFT_RESET_CPG, 1);
5076                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET,
5077                                                 SOFT_RESET_GRBM, 1);
5078         }
5079
5080         /* SRBM_STATUS */
5081         tmp = RREG32(mmSRBM_STATUS);
5082         if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
5083                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5084                                                 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5085         if (REG_GET_FIELD(tmp, SRBM_STATUS, SEM_BUSY))
5086                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5087                                                 SRBM_SOFT_RESET, SOFT_RESET_SEM, 1);
5088
5089         if (grbm_soft_reset || srbm_soft_reset) {
5090                 adev->gfx.grbm_soft_reset = grbm_soft_reset;
5091                 adev->gfx.srbm_soft_reset = srbm_soft_reset;
5092                 return true;
5093         } else {
5094                 adev->gfx.grbm_soft_reset = 0;
5095                 adev->gfx.srbm_soft_reset = 0;
5096                 return false;
5097         }
5098 }
5099
5100 static int gfx_v8_0_pre_soft_reset(void *handle)
5101 {
5102         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5103         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5104
5105         if ((!adev->gfx.grbm_soft_reset) &&
5106             (!adev->gfx.srbm_soft_reset))
5107                 return 0;
5108
5109         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5110         srbm_soft_reset = adev->gfx.srbm_soft_reset;
5111
5112         /* stop the rlc */
5113         gfx_v8_0_rlc_stop(adev);
5114
5115         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5116             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5117                 /* Disable GFX parsing/prefetching */
5118                 gfx_v8_0_cp_gfx_enable(adev, false);
5119
5120         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5121             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5122             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5123             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5124                 int i;
5125
5126                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5127                         struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5128
5129                         mutex_lock(&adev->srbm_mutex);
5130                         vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5131                         gfx_v8_0_deactivate_hqd(adev, 2);
5132                         vi_srbm_select(adev, 0, 0, 0, 0);
5133                         mutex_unlock(&adev->srbm_mutex);
5134                 }
5135                 /* Disable MEC parsing/prefetching */
5136                 gfx_v8_0_cp_compute_enable(adev, false);
5137         }
5138
5139        return 0;
5140 }
5141
5142 static int gfx_v8_0_soft_reset(void *handle)
5143 {
5144         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5145         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5146         u32 tmp;
5147
5148         if ((!adev->gfx.grbm_soft_reset) &&
5149             (!adev->gfx.srbm_soft_reset))
5150                 return 0;
5151
5152         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5153         srbm_soft_reset = adev->gfx.srbm_soft_reset;
5154
5155         if (grbm_soft_reset || srbm_soft_reset) {
5156                 tmp = RREG32(mmGMCON_DEBUG);
5157                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 1);
5158                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 1);
5159                 WREG32(mmGMCON_DEBUG, tmp);
5160                 udelay(50);
5161         }
5162
5163         if (grbm_soft_reset) {
5164                 tmp = RREG32(mmGRBM_SOFT_RESET);
5165                 tmp |= grbm_soft_reset;
5166                 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5167                 WREG32(mmGRBM_SOFT_RESET, tmp);
5168                 tmp = RREG32(mmGRBM_SOFT_RESET);
5169
5170                 udelay(50);
5171
5172                 tmp &= ~grbm_soft_reset;
5173                 WREG32(mmGRBM_SOFT_RESET, tmp);
5174                 tmp = RREG32(mmGRBM_SOFT_RESET);
5175         }
5176
5177         if (srbm_soft_reset) {
5178                 tmp = RREG32(mmSRBM_SOFT_RESET);
5179                 tmp |= srbm_soft_reset;
5180                 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5181                 WREG32(mmSRBM_SOFT_RESET, tmp);
5182                 tmp = RREG32(mmSRBM_SOFT_RESET);
5183
5184                 udelay(50);
5185
5186                 tmp &= ~srbm_soft_reset;
5187                 WREG32(mmSRBM_SOFT_RESET, tmp);
5188                 tmp = RREG32(mmSRBM_SOFT_RESET);
5189         }
5190
5191         if (grbm_soft_reset || srbm_soft_reset) {
5192                 tmp = RREG32(mmGMCON_DEBUG);
5193                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 0);
5194                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 0);
5195                 WREG32(mmGMCON_DEBUG, tmp);
5196         }
5197
5198         /* Wait a little for things to settle down */
5199         udelay(50);
5200
5201         return 0;
5202 }
5203
5204 static int gfx_v8_0_post_soft_reset(void *handle)
5205 {
5206         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5207         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5208
5209         if ((!adev->gfx.grbm_soft_reset) &&
5210             (!adev->gfx.srbm_soft_reset))
5211                 return 0;
5212
5213         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5214         srbm_soft_reset = adev->gfx.srbm_soft_reset;
5215
5216         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5217             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5218                 gfx_v8_0_cp_gfx_resume(adev);
5219
5220         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5221             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5222             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5223             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5224                 int i;
5225
5226                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5227                         struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5228
5229                         mutex_lock(&adev->srbm_mutex);
5230                         vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5231                         gfx_v8_0_deactivate_hqd(adev, 2);
5232                         vi_srbm_select(adev, 0, 0, 0, 0);
5233                         mutex_unlock(&adev->srbm_mutex);
5234                 }
5235                 gfx_v8_0_kiq_resume(adev);
5236         }
5237         gfx_v8_0_rlc_start(adev);
5238
5239         return 0;
5240 }
5241
5242 /**
5243  * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
5244  *
5245  * @adev: amdgpu_device pointer
5246  *
5247  * Fetches a GPU clock counter snapshot.
5248  * Returns the 64 bit clock counter snapshot.
5249  */
5250 static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
5251 {
5252         uint64_t clock;
5253
5254         mutex_lock(&adev->gfx.gpu_clock_mutex);
5255         WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
5256         clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
5257                 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5258         mutex_unlock(&adev->gfx.gpu_clock_mutex);
5259         return clock;
5260 }
5261
5262 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
5263                                           uint32_t vmid,
5264                                           uint32_t gds_base, uint32_t gds_size,
5265                                           uint32_t gws_base, uint32_t gws_size,
5266                                           uint32_t oa_base, uint32_t oa_size)
5267 {
5268         gds_base = gds_base >> AMDGPU_GDS_SHIFT;
5269         gds_size = gds_size >> AMDGPU_GDS_SHIFT;
5270
5271         gws_base = gws_base >> AMDGPU_GWS_SHIFT;
5272         gws_size = gws_size >> AMDGPU_GWS_SHIFT;
5273
5274         oa_base = oa_base >> AMDGPU_OA_SHIFT;
5275         oa_size = oa_size >> AMDGPU_OA_SHIFT;
5276
5277         /* GDS Base */
5278         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5279         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5280                                 WRITE_DATA_DST_SEL(0)));
5281         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
5282         amdgpu_ring_write(ring, 0);
5283         amdgpu_ring_write(ring, gds_base);
5284
5285         /* GDS Size */
5286         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5287         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5288                                 WRITE_DATA_DST_SEL(0)));
5289         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
5290         amdgpu_ring_write(ring, 0);
5291         amdgpu_ring_write(ring, gds_size);
5292
5293         /* GWS */
5294         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5295         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5296                                 WRITE_DATA_DST_SEL(0)));
5297         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
5298         amdgpu_ring_write(ring, 0);
5299         amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
5300
5301         /* OA */
5302         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5303         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5304                                 WRITE_DATA_DST_SEL(0)));
5305         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
5306         amdgpu_ring_write(ring, 0);
5307         amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
5308 }
5309
5310 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
5311 {
5312         WREG32(mmSQ_IND_INDEX,
5313                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5314                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5315                 (address << SQ_IND_INDEX__INDEX__SHIFT) |
5316                 (SQ_IND_INDEX__FORCE_READ_MASK));
5317         return RREG32(mmSQ_IND_DATA);
5318 }
5319
5320 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
5321                            uint32_t wave, uint32_t thread,
5322                            uint32_t regno, uint32_t num, uint32_t *out)
5323 {
5324         WREG32(mmSQ_IND_INDEX,
5325                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5326                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5327                 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
5328                 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
5329                 (SQ_IND_INDEX__FORCE_READ_MASK) |
5330                 (SQ_IND_INDEX__AUTO_INCR_MASK));
5331         while (num--)
5332                 *(out++) = RREG32(mmSQ_IND_DATA);
5333 }
5334
5335 static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
5336 {
5337         /* type 0 wave data */
5338         dst[(*no_fields)++] = 0;
5339         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
5340         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
5341         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
5342         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
5343         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
5344         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
5345         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
5346         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
5347         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
5348         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
5349         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
5350         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
5351         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_LO);
5352         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_HI);
5353         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_LO);
5354         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI);
5355         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
5356         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
5357 }
5358
5359 static void gfx_v8_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
5360                                      uint32_t wave, uint32_t start,
5361                                      uint32_t size, uint32_t *dst)
5362 {
5363         wave_read_regs(
5364                 adev, simd, wave, 0,
5365                 start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
5366 }
5367
5368
5369 static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = {
5370         .get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter,
5371         .select_se_sh = &gfx_v8_0_select_se_sh,
5372         .read_wave_data = &gfx_v8_0_read_wave_data,
5373         .read_wave_sgprs = &gfx_v8_0_read_wave_sgprs,
5374 };
5375
5376 static int gfx_v8_0_early_init(void *handle)
5377 {
5378         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5379
5380         adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
5381         adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
5382         adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
5383         gfx_v8_0_set_ring_funcs(adev);
5384         gfx_v8_0_set_irq_funcs(adev);
5385         gfx_v8_0_set_gds_init(adev);
5386         gfx_v8_0_set_rlc_funcs(adev);
5387
5388         return 0;
5389 }
5390
5391 static int gfx_v8_0_late_init(void *handle)
5392 {
5393         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5394         int r;
5395
5396         r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
5397         if (r)
5398                 return r;
5399
5400         r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
5401         if (r)
5402                 return r;
5403
5404         /* requires IBs so do in late init after IB pool is initialized */
5405         r = gfx_v8_0_do_edc_gpr_workarounds(adev);
5406         if (r)
5407                 return r;
5408
5409         amdgpu_set_powergating_state(adev,
5410                         AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_GATE);
5411
5412         return 0;
5413 }
5414
5415 static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
5416                                                        bool enable)
5417 {
5418         if ((adev->asic_type == CHIP_POLARIS11) ||
5419             (adev->asic_type == CHIP_POLARIS12))
5420                 /* Send msg to SMU via Powerplay */
5421                 amdgpu_set_powergating_state(adev,
5422                                              AMD_IP_BLOCK_TYPE_SMC,
5423                                              enable ?
5424                                              AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE);
5425
5426         WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0);
5427 }
5428
5429 static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
5430                                                         bool enable)
5431 {
5432         WREG32_FIELD(RLC_PG_CNTL, DYN_PER_CU_PG_ENABLE, enable ? 1 : 0);
5433 }
5434
5435 static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
5436                 bool enable)
5437 {
5438         WREG32_FIELD(RLC_PG_CNTL, QUICK_PG_ENABLE, enable ? 1 : 0);
5439 }
5440
5441 static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
5442                                           bool enable)
5443 {
5444         WREG32_FIELD(RLC_PG_CNTL, GFX_POWER_GATING_ENABLE, enable ? 1 : 0);
5445 }
5446
5447 static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev,
5448                                                 bool enable)
5449 {
5450         WREG32_FIELD(RLC_PG_CNTL, GFX_PIPELINE_PG_ENABLE, enable ? 1 : 0);
5451
5452         /* Read any GFX register to wake up GFX. */
5453         if (!enable)
5454                 RREG32(mmDB_RENDER_CONTROL);
5455 }
5456
5457 static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev,
5458                                           bool enable)
5459 {
5460         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
5461                 cz_enable_gfx_cg_power_gating(adev, true);
5462                 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
5463                         cz_enable_gfx_pipeline_power_gating(adev, true);
5464         } else {
5465                 cz_enable_gfx_cg_power_gating(adev, false);
5466                 cz_enable_gfx_pipeline_power_gating(adev, false);
5467         }
5468 }
5469
5470 static int gfx_v8_0_set_powergating_state(void *handle,
5471                                           enum amd_powergating_state state)
5472 {
5473         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5474         bool enable = (state == AMD_PG_STATE_GATE);
5475
5476         if (amdgpu_sriov_vf(adev))
5477                 return 0;
5478
5479         switch (adev->asic_type) {
5480         case CHIP_CARRIZO:
5481         case CHIP_STONEY:
5482
5483                 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
5484                         cz_enable_sck_slow_down_on_power_up(adev, true);
5485                         cz_enable_sck_slow_down_on_power_down(adev, true);
5486                 } else {
5487                         cz_enable_sck_slow_down_on_power_up(adev, false);
5488                         cz_enable_sck_slow_down_on_power_down(adev, false);
5489                 }
5490                 if (adev->pg_flags & AMD_PG_SUPPORT_CP)
5491                         cz_enable_cp_power_gating(adev, true);
5492                 else
5493                         cz_enable_cp_power_gating(adev, false);
5494
5495                 cz_update_gfx_cg_power_gating(adev, enable);
5496
5497                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5498                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5499                 else
5500                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5501
5502                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5503                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5504                 else
5505                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5506                 break;
5507         case CHIP_POLARIS11:
5508         case CHIP_POLARIS12:
5509                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5510                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5511                 else
5512                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5513
5514                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5515                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5516                 else
5517                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5518
5519                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable)
5520                         polaris11_enable_gfx_quick_mg_power_gating(adev, true);
5521                 else
5522                         polaris11_enable_gfx_quick_mg_power_gating(adev, false);
5523                 break;
5524         default:
5525                 break;
5526         }
5527
5528         return 0;
5529 }
5530
5531 static void gfx_v8_0_get_clockgating_state(void *handle, u32 *flags)
5532 {
5533         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5534         int data;
5535
5536         if (amdgpu_sriov_vf(adev))
5537                 *flags = 0;
5538
5539         /* AMD_CG_SUPPORT_GFX_MGCG */
5540         data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5541         if (!(data & RLC_CGTT_MGCG_OVERRIDE__CPF_MASK))
5542                 *flags |= AMD_CG_SUPPORT_GFX_MGCG;
5543
5544         /* AMD_CG_SUPPORT_GFX_CGLG */
5545         data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5546         if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5547                 *flags |= AMD_CG_SUPPORT_GFX_CGCG;
5548
5549         /* AMD_CG_SUPPORT_GFX_CGLS */
5550         if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5551                 *flags |= AMD_CG_SUPPORT_GFX_CGLS;
5552
5553         /* AMD_CG_SUPPORT_GFX_CGTS */
5554         data = RREG32(mmCGTS_SM_CTRL_REG);
5555         if (!(data & CGTS_SM_CTRL_REG__OVERRIDE_MASK))
5556                 *flags |= AMD_CG_SUPPORT_GFX_CGTS;
5557
5558         /* AMD_CG_SUPPORT_GFX_CGTS_LS */
5559         if (!(data & CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK))
5560                 *flags |= AMD_CG_SUPPORT_GFX_CGTS_LS;
5561
5562         /* AMD_CG_SUPPORT_GFX_RLC_LS */
5563         data = RREG32(mmRLC_MEM_SLP_CNTL);
5564         if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
5565                 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
5566
5567         /* AMD_CG_SUPPORT_GFX_CP_LS */
5568         data = RREG32(mmCP_MEM_SLP_CNTL);
5569         if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
5570                 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
5571 }
5572
5573 static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
5574                                      uint32_t reg_addr, uint32_t cmd)
5575 {
5576         uint32_t data;
5577
5578         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
5579
5580         WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5581         WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5582
5583         data = RREG32(mmRLC_SERDES_WR_CTRL);
5584         if (adev->asic_type == CHIP_STONEY)
5585                 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5586                           RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5587                           RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5588                           RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5589                           RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5590                           RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5591                           RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5592                           RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5593                           RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5594         else
5595                 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5596                           RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5597                           RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5598                           RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5599                           RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5600                           RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5601                           RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5602                           RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5603                           RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
5604                           RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
5605                           RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5606         data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
5607                  (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
5608                  (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
5609                  (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
5610
5611         WREG32(mmRLC_SERDES_WR_CTRL, data);
5612 }
5613
5614 #define MSG_ENTER_RLC_SAFE_MODE     1
5615 #define MSG_EXIT_RLC_SAFE_MODE      0
5616 #define RLC_GPR_REG2__REQ_MASK 0x00000001
5617 #define RLC_GPR_REG2__REQ__SHIFT 0
5618 #define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001
5619 #define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e
5620
5621 static void iceland_enter_rlc_safe_mode(struct amdgpu_device *adev)
5622 {
5623         u32 data;
5624         unsigned i;
5625
5626         data = RREG32(mmRLC_CNTL);
5627         if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5628                 return;
5629
5630         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5631                 data |= RLC_SAFE_MODE__CMD_MASK;
5632                 data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5633                 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
5634                 WREG32(mmRLC_SAFE_MODE, data);
5635
5636                 for (i = 0; i < adev->usec_timeout; i++) {
5637                         if ((RREG32(mmRLC_GPM_STAT) &
5638                              (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5639                               RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5640                             (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5641                              RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5642                                 break;
5643                         udelay(1);
5644                 }
5645
5646                 for (i = 0; i < adev->usec_timeout; i++) {
5647                         if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5648                                 break;
5649                         udelay(1);
5650                 }
5651                 adev->gfx.rlc.in_safe_mode = true;
5652         }
5653 }
5654
5655 static void iceland_exit_rlc_safe_mode(struct amdgpu_device *adev)
5656 {
5657         u32 data = 0;
5658         unsigned i;
5659
5660         data = RREG32(mmRLC_CNTL);
5661         if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5662                 return;
5663
5664         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5665                 if (adev->gfx.rlc.in_safe_mode) {
5666                         data |= RLC_SAFE_MODE__CMD_MASK;
5667                         data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5668                         WREG32(mmRLC_SAFE_MODE, data);
5669                         adev->gfx.rlc.in_safe_mode = false;
5670                 }
5671         }
5672
5673         for (i = 0; i < adev->usec_timeout; i++) {
5674                 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5675                         break;
5676                 udelay(1);
5677         }
5678 }
5679
5680 static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
5681         .enter_safe_mode = iceland_enter_rlc_safe_mode,
5682         .exit_safe_mode = iceland_exit_rlc_safe_mode
5683 };
5684
5685 static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
5686                                                       bool enable)
5687 {
5688         uint32_t temp, data;
5689
5690         adev->gfx.rlc.funcs->enter_safe_mode(adev);
5691
5692         /* It is disabled by HW by default */
5693         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
5694                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5695                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS)
5696                                 /* 1 - RLC memory Light sleep */
5697                                 WREG32_FIELD(RLC_MEM_SLP_CNTL, RLC_MEM_LS_EN, 1);
5698
5699                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS)
5700                                 WREG32_FIELD(CP_MEM_SLP_CNTL, CP_MEM_LS_EN, 1);
5701                 }
5702
5703                 /* 3 - RLC_CGTT_MGCG_OVERRIDE */
5704                 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5705                 if (adev->flags & AMD_IS_APU)
5706                         data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5707                                   RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5708                                   RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK);
5709                 else
5710                         data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5711                                   RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5712                                   RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5713                                   RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5714
5715                 if (temp != data)
5716                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5717
5718                 /* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5719                 gfx_v8_0_wait_for_rlc_serdes(adev);
5720
5721                 /* 5 - clear mgcg override */
5722                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5723
5724                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
5725                         /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
5726                         temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5727                         data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
5728                         data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
5729                         data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
5730                         data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
5731                         if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
5732                             (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
5733                                 data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
5734                         data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
5735                         data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
5736                         if (temp != data)
5737                                 WREG32(mmCGTS_SM_CTRL_REG, data);
5738                 }
5739                 udelay(50);
5740
5741                 /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5742                 gfx_v8_0_wait_for_rlc_serdes(adev);
5743         } else {
5744                 /* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
5745                 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5746                 data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5747                                 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5748                                 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5749                                 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5750                 if (temp != data)
5751                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5752
5753                 /* 2 - disable MGLS in RLC */
5754                 data = RREG32(mmRLC_MEM_SLP_CNTL);
5755                 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
5756                         data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5757                         WREG32(mmRLC_MEM_SLP_CNTL, data);
5758                 }
5759
5760                 /* 3 - disable MGLS in CP */
5761                 data = RREG32(mmCP_MEM_SLP_CNTL);
5762                 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
5763                         data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5764                         WREG32(mmCP_MEM_SLP_CNTL, data);
5765                 }
5766
5767                 /* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
5768                 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5769                 data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
5770                                 CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
5771                 if (temp != data)
5772                         WREG32(mmCGTS_SM_CTRL_REG, data);
5773
5774                 /* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5775                 gfx_v8_0_wait_for_rlc_serdes(adev);
5776
5777                 /* 6 - set mgcg override */
5778                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5779
5780                 udelay(50);
5781
5782                 /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5783                 gfx_v8_0_wait_for_rlc_serdes(adev);
5784         }
5785
5786         adev->gfx.rlc.funcs->exit_safe_mode(adev);
5787 }
5788
5789 static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5790                                                       bool enable)
5791 {
5792         uint32_t temp, temp1, data, data1;
5793
5794         temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5795
5796         adev->gfx.rlc.funcs->enter_safe_mode(adev);
5797
5798         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
5799                 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5800                 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
5801                 if (temp1 != data1)
5802                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5803
5804                 /* : wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5805                 gfx_v8_0_wait_for_rlc_serdes(adev);
5806
5807                 /* 2 - clear cgcg override */
5808                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5809
5810                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5811                 gfx_v8_0_wait_for_rlc_serdes(adev);
5812
5813                 /* 3 - write cmd to set CGLS */
5814                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
5815
5816                 /* 4 - enable cgcg */
5817                 data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5818
5819                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5820                         /* enable cgls*/
5821                         data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5822
5823                         temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5824                         data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
5825
5826                         if (temp1 != data1)
5827                                 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5828                 } else {
5829                         data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5830                 }
5831
5832                 if (temp != data)
5833                         WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5834
5835                 /* 5 enable cntx_empty_int_enable/cntx_busy_int_enable/
5836                  * Cmp_busy/GFX_Idle interrupts
5837                  */
5838                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5839         } else {
5840                 /* disable cntx_empty_int_enable & GFX Idle interrupt */
5841                 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5842
5843                 /* TEST CGCG */
5844                 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5845                 data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
5846                                 RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
5847                 if (temp1 != data1)
5848                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5849
5850                 /* read gfx register to wake up cgcg */
5851                 RREG32(mmCB_CGTT_SCLK_CTRL);
5852                 RREG32(mmCB_CGTT_SCLK_CTRL);
5853                 RREG32(mmCB_CGTT_SCLK_CTRL);
5854                 RREG32(mmCB_CGTT_SCLK_CTRL);
5855
5856                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5857                 gfx_v8_0_wait_for_rlc_serdes(adev);
5858
5859                 /* write cmd to Set CGCG Overrride */
5860                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5861
5862                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5863                 gfx_v8_0_wait_for_rlc_serdes(adev);
5864
5865                 /* write cmd to Clear CGLS */
5866                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
5867
5868                 /* disable cgcg, cgls should be disabled too. */
5869                 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
5870                           RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
5871                 if (temp != data)
5872                         WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5873                 /* enable interrupts again for PG */
5874                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5875         }
5876
5877         gfx_v8_0_wait_for_rlc_serdes(adev);
5878
5879         adev->gfx.rlc.funcs->exit_safe_mode(adev);
5880 }
5881 static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
5882                                             bool enable)
5883 {
5884         if (enable) {
5885                 /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
5886                  * ===  MGCG + MGLS + TS(CG/LS) ===
5887                  */
5888                 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5889                 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5890         } else {
5891                 /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
5892                  * ===  CGCG + CGLS ===
5893                  */
5894                 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5895                 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5896         }
5897         return 0;
5898 }
5899
5900 static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev,
5901                                           enum amd_clockgating_state state)
5902 {
5903         uint32_t msg_id, pp_state = 0;
5904         uint32_t pp_support_state = 0;
5905         void *pp_handle = adev->powerplay.pp_handle;
5906
5907         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
5908                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5909                         pp_support_state = PP_STATE_SUPPORT_LS;
5910                         pp_state = PP_STATE_LS;
5911                 }
5912                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
5913                         pp_support_state |= PP_STATE_SUPPORT_CG;
5914                         pp_state |= PP_STATE_CG;
5915                 }
5916                 if (state == AMD_CG_STATE_UNGATE)
5917                         pp_state = 0;
5918
5919                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5920                                 PP_BLOCK_GFX_CG,
5921                                 pp_support_state,
5922                                 pp_state);
5923                 amd_set_clockgating_by_smu(pp_handle, msg_id);
5924         }
5925
5926         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
5927                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5928                         pp_support_state = PP_STATE_SUPPORT_LS;
5929                         pp_state = PP_STATE_LS;
5930                 }
5931
5932                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
5933                         pp_support_state |= PP_STATE_SUPPORT_CG;
5934                         pp_state |= PP_STATE_CG;
5935                 }
5936
5937                 if (state == AMD_CG_STATE_UNGATE)
5938                         pp_state = 0;
5939
5940                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5941                                 PP_BLOCK_GFX_MG,
5942                                 pp_support_state,
5943                                 pp_state);
5944                 amd_set_clockgating_by_smu(pp_handle, msg_id);
5945         }
5946
5947         return 0;
5948 }
5949
5950 static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev,
5951                                           enum amd_clockgating_state state)
5952 {
5953
5954         uint32_t msg_id, pp_state = 0;
5955         uint32_t pp_support_state = 0;
5956         void *pp_handle = adev->powerplay.pp_handle;
5957
5958         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
5959                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5960                         pp_support_state = PP_STATE_SUPPORT_LS;
5961                         pp_state = PP_STATE_LS;
5962                 }
5963                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
5964                         pp_support_state |= PP_STATE_SUPPORT_CG;
5965                         pp_state |= PP_STATE_CG;
5966                 }
5967                 if (state == AMD_CG_STATE_UNGATE)
5968                         pp_state = 0;
5969
5970                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5971                                 PP_BLOCK_GFX_CG,
5972                                 pp_support_state,
5973                                 pp_state);
5974                 amd_set_clockgating_by_smu(pp_handle, msg_id);
5975         }
5976
5977         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_GFX_3D_CGLS)) {
5978                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) {
5979                         pp_support_state = PP_STATE_SUPPORT_LS;
5980                         pp_state = PP_STATE_LS;
5981                 }
5982                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) {
5983                         pp_support_state |= PP_STATE_SUPPORT_CG;
5984                         pp_state |= PP_STATE_CG;
5985                 }
5986                 if (state == AMD_CG_STATE_UNGATE)
5987                         pp_state = 0;
5988
5989                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5990                                 PP_BLOCK_GFX_3D,
5991                                 pp_support_state,
5992                                 pp_state);
5993                 amd_set_clockgating_by_smu(pp_handle, msg_id);
5994         }
5995
5996         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
5997                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5998                         pp_support_state = PP_STATE_SUPPORT_LS;
5999                         pp_state = PP_STATE_LS;
6000                 }
6001
6002                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
6003                         pp_support_state |= PP_STATE_SUPPORT_CG;
6004                         pp_state |= PP_STATE_CG;
6005                 }
6006
6007                 if (state == AMD_CG_STATE_UNGATE)
6008                         pp_state = 0;
6009
6010                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6011                                 PP_BLOCK_GFX_MG,
6012                                 pp_support_state,
6013                                 pp_state);
6014                 amd_set_clockgating_by_smu(pp_handle, msg_id);
6015         }
6016
6017         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
6018                 pp_support_state = PP_STATE_SUPPORT_LS;
6019
6020                 if (state == AMD_CG_STATE_UNGATE)
6021                         pp_state = 0;
6022                 else
6023                         pp_state = PP_STATE_LS;
6024
6025                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6026                                 PP_BLOCK_GFX_RLC,
6027                                 pp_support_state,
6028                                 pp_state);
6029                 amd_set_clockgating_by_smu(pp_handle, msg_id);
6030         }
6031
6032         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
6033                 pp_support_state = PP_STATE_SUPPORT_LS;
6034
6035                 if (state == AMD_CG_STATE_UNGATE)
6036                         pp_state = 0;
6037                 else
6038                         pp_state = PP_STATE_LS;
6039                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6040                         PP_BLOCK_GFX_CP,
6041                         pp_support_state,
6042                         pp_state);
6043                 amd_set_clockgating_by_smu(pp_handle, msg_id);
6044         }
6045
6046         return 0;
6047 }
6048
6049 static int gfx_v8_0_set_clockgating_state(void *handle,
6050                                           enum amd_clockgating_state state)
6051 {
6052         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
6053
6054         if (amdgpu_sriov_vf(adev))
6055                 return 0;
6056
6057         switch (adev->asic_type) {
6058         case CHIP_FIJI:
6059         case CHIP_CARRIZO:
6060         case CHIP_STONEY:
6061                 gfx_v8_0_update_gfx_clock_gating(adev,
6062                                                  state == AMD_CG_STATE_GATE);
6063                 break;
6064         case CHIP_TONGA:
6065                 gfx_v8_0_tonga_update_gfx_clock_gating(adev, state);
6066                 break;
6067         case CHIP_POLARIS10:
6068         case CHIP_POLARIS11:
6069         case CHIP_POLARIS12:
6070                 gfx_v8_0_polaris_update_gfx_clock_gating(adev, state);
6071                 break;
6072         default:
6073                 break;
6074         }
6075         return 0;
6076 }
6077
6078 static u64 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring)
6079 {
6080         return ring->adev->wb.wb[ring->rptr_offs];
6081 }
6082
6083 static u64 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
6084 {
6085         struct amdgpu_device *adev = ring->adev;
6086
6087         if (ring->use_doorbell)
6088                 /* XXX check if swapping is necessary on BE */
6089                 return ring->adev->wb.wb[ring->wptr_offs];
6090         else
6091                 return RREG32(mmCP_RB0_WPTR);
6092 }
6093
6094 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
6095 {
6096         struct amdgpu_device *adev = ring->adev;
6097
6098         if (ring->use_doorbell) {
6099                 /* XXX check if swapping is necessary on BE */
6100                 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6101                 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6102         } else {
6103                 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
6104                 (void)RREG32(mmCP_RB0_WPTR);
6105         }
6106 }
6107
6108 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
6109 {
6110         u32 ref_and_mask, reg_mem_engine;
6111
6112         if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) ||
6113             (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)) {
6114                 switch (ring->me) {
6115                 case 1:
6116                         ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
6117                         break;
6118                 case 2:
6119                         ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
6120                         break;
6121                 default:
6122                         return;
6123                 }
6124                 reg_mem_engine = 0;
6125         } else {
6126                 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
6127                 reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
6128         }
6129
6130         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6131         amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
6132                                  WAIT_REG_MEM_FUNCTION(3) |  /* == */
6133                                  reg_mem_engine));
6134         amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
6135         amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
6136         amdgpu_ring_write(ring, ref_and_mask);
6137         amdgpu_ring_write(ring, ref_and_mask);
6138         amdgpu_ring_write(ring, 0x20); /* poll interval */
6139 }
6140
6141 static void gfx_v8_0_ring_emit_vgt_flush(struct amdgpu_ring *ring)
6142 {
6143         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6144         amdgpu_ring_write(ring, EVENT_TYPE(VS_PARTIAL_FLUSH) |
6145                 EVENT_INDEX(4));
6146
6147         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6148         amdgpu_ring_write(ring, EVENT_TYPE(VGT_FLUSH) |
6149                 EVENT_INDEX(0));
6150 }
6151
6152
6153 static void gfx_v8_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
6154 {
6155         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6156         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6157                                  WRITE_DATA_DST_SEL(0) |
6158                                  WR_CONFIRM));
6159         amdgpu_ring_write(ring, mmHDP_DEBUG0);
6160         amdgpu_ring_write(ring, 0);
6161         amdgpu_ring_write(ring, 1);
6162
6163 }
6164
6165 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
6166                                       struct amdgpu_ib *ib,
6167                                       unsigned vm_id, bool ctx_switch)
6168 {
6169         u32 header, control = 0;
6170
6171         if (ib->flags & AMDGPU_IB_FLAG_CE)
6172                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
6173         else
6174                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
6175
6176         control |= ib->length_dw | (vm_id << 24);
6177
6178         if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
6179                 control |= INDIRECT_BUFFER_PRE_ENB(1);
6180
6181                 if (!(ib->flags & AMDGPU_IB_FLAG_CE))
6182                         gfx_v8_0_ring_emit_de_meta(ring);
6183         }
6184
6185         amdgpu_ring_write(ring, header);
6186         amdgpu_ring_write(ring,
6187 #ifdef __BIG_ENDIAN
6188                           (2 << 0) |
6189 #endif
6190                           (ib->gpu_addr & 0xFFFFFFFC));
6191         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6192         amdgpu_ring_write(ring, control);
6193 }
6194
6195 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
6196                                           struct amdgpu_ib *ib,
6197                                           unsigned vm_id, bool ctx_switch)
6198 {
6199         u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vm_id << 24);
6200
6201         amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
6202         amdgpu_ring_write(ring,
6203 #ifdef __BIG_ENDIAN
6204                                 (2 << 0) |
6205 #endif
6206                                 (ib->gpu_addr & 0xFFFFFFFC));
6207         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6208         amdgpu_ring_write(ring, control);
6209 }
6210
6211 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
6212                                          u64 seq, unsigned flags)
6213 {
6214         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6215         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6216
6217         /* EVENT_WRITE_EOP - flush caches, send int */
6218         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6219         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6220                                  EOP_TC_ACTION_EN |
6221                                  EOP_TC_WB_ACTION_EN |
6222                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6223                                  EVENT_INDEX(5)));
6224         amdgpu_ring_write(ring, addr & 0xfffffffc);
6225         amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
6226                           DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6227         amdgpu_ring_write(ring, lower_32_bits(seq));
6228         amdgpu_ring_write(ring, upper_32_bits(seq));
6229
6230 }
6231
6232 static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
6233 {
6234         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6235         uint32_t seq = ring->fence_drv.sync_seq;
6236         uint64_t addr = ring->fence_drv.gpu_addr;
6237
6238         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6239         amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
6240                                  WAIT_REG_MEM_FUNCTION(3) | /* equal */
6241                                  WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
6242         amdgpu_ring_write(ring, addr & 0xfffffffc);
6243         amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
6244         amdgpu_ring_write(ring, seq);
6245         amdgpu_ring_write(ring, 0xffffffff);
6246         amdgpu_ring_write(ring, 4); /* poll interval */
6247 }
6248
6249 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
6250                                         unsigned vm_id, uint64_t pd_addr)
6251 {
6252         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6253
6254         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6255         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6256                                  WRITE_DATA_DST_SEL(0)) |
6257                                  WR_CONFIRM);
6258         if (vm_id < 8) {
6259                 amdgpu_ring_write(ring,
6260                                   (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id));
6261         } else {
6262                 amdgpu_ring_write(ring,
6263                                   (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vm_id - 8));
6264         }
6265         amdgpu_ring_write(ring, 0);
6266         amdgpu_ring_write(ring, pd_addr >> 12);
6267
6268         /* bits 0-15 are the VM contexts0-15 */
6269         /* invalidate the cache */
6270         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6271         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6272                                  WRITE_DATA_DST_SEL(0)));
6273         amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6274         amdgpu_ring_write(ring, 0);
6275         amdgpu_ring_write(ring, 1 << vm_id);
6276
6277         /* wait for the invalidate to complete */
6278         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6279         amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6280                                  WAIT_REG_MEM_FUNCTION(0) |  /* always */
6281                                  WAIT_REG_MEM_ENGINE(0))); /* me */
6282         amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6283         amdgpu_ring_write(ring, 0);
6284         amdgpu_ring_write(ring, 0); /* ref */
6285         amdgpu_ring_write(ring, 0); /* mask */
6286         amdgpu_ring_write(ring, 0x20); /* poll interval */
6287
6288         /* compute doesn't have PFP */
6289         if (usepfp) {
6290                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
6291                 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6292                 amdgpu_ring_write(ring, 0x0);
6293         }
6294 }
6295
6296 static u64 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
6297 {
6298         return ring->adev->wb.wb[ring->wptr_offs];
6299 }
6300
6301 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
6302 {
6303         struct amdgpu_device *adev = ring->adev;
6304
6305         /* XXX check if swapping is necessary on BE */
6306         adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6307         WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6308 }
6309
6310 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
6311                                              u64 addr, u64 seq,
6312                                              unsigned flags)
6313 {
6314         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6315         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6316
6317         /* RELEASE_MEM - flush caches, send int */
6318         amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
6319         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6320                                  EOP_TC_ACTION_EN |
6321                                  EOP_TC_WB_ACTION_EN |
6322                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6323                                  EVENT_INDEX(5)));
6324         amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6325         amdgpu_ring_write(ring, addr & 0xfffffffc);
6326         amdgpu_ring_write(ring, upper_32_bits(addr));
6327         amdgpu_ring_write(ring, lower_32_bits(seq));
6328         amdgpu_ring_write(ring, upper_32_bits(seq));
6329 }
6330
6331 static void gfx_v8_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
6332                                          u64 seq, unsigned int flags)
6333 {
6334         /* we only allocate 32bit for each seq wb address */
6335         BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
6336
6337         /* write fence seq to the "addr" */
6338         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6339         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6340                                  WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
6341         amdgpu_ring_write(ring, lower_32_bits(addr));
6342         amdgpu_ring_write(ring, upper_32_bits(addr));
6343         amdgpu_ring_write(ring, lower_32_bits(seq));
6344
6345         if (flags & AMDGPU_FENCE_FLAG_INT) {
6346                 /* set register to trigger INT */
6347                 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6348                 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6349                                          WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
6350                 amdgpu_ring_write(ring, mmCPC_INT_STATUS);
6351                 amdgpu_ring_write(ring, 0);
6352                 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
6353         }
6354 }
6355
6356 static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring)
6357 {
6358         amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6359         amdgpu_ring_write(ring, 0);
6360 }
6361
6362 static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
6363 {
6364         uint32_t dw2 = 0;
6365
6366         if (amdgpu_sriov_vf(ring->adev))
6367                 gfx_v8_0_ring_emit_ce_meta(ring);
6368
6369         dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
6370         if (flags & AMDGPU_HAVE_CTX_SWITCH) {
6371                 gfx_v8_0_ring_emit_vgt_flush(ring);
6372                 /* set load_global_config & load_global_uconfig */
6373                 dw2 |= 0x8001;
6374                 /* set load_cs_sh_regs */
6375                 dw2 |= 0x01000000;
6376                 /* set load_per_context_state & load_gfx_sh_regs for GFX */
6377                 dw2 |= 0x10002;
6378
6379                 /* set load_ce_ram if preamble presented */
6380                 if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
6381                         dw2 |= 0x10000000;
6382         } else {
6383                 /* still load_ce_ram if this is the first time preamble presented
6384                  * although there is no context switch happens.
6385                  */
6386                 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
6387                         dw2 |= 0x10000000;
6388         }
6389
6390         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6391         amdgpu_ring_write(ring, dw2);
6392         amdgpu_ring_write(ring, 0);
6393 }
6394
6395 static unsigned gfx_v8_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
6396 {
6397         unsigned ret;
6398
6399         amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
6400         amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
6401         amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
6402         amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
6403         ret = ring->wptr & ring->buf_mask;
6404         amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
6405         return ret;
6406 }
6407
6408 static void gfx_v8_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
6409 {
6410         unsigned cur;
6411
6412         BUG_ON(offset > ring->buf_mask);
6413         BUG_ON(ring->ring[offset] != 0x55aa55aa);
6414
6415         cur = (ring->wptr & ring->buf_mask) - 1;
6416         if (likely(cur > offset))
6417                 ring->ring[offset] = cur - offset;
6418         else
6419                 ring->ring[offset] = (ring->ring_size >> 2) - offset + cur;
6420 }
6421
6422 static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
6423 {
6424         struct amdgpu_device *adev = ring->adev;
6425
6426         amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
6427         amdgpu_ring_write(ring, 0 |     /* src: register*/
6428                                 (5 << 8) |      /* dst: memory */
6429                                 (1 << 20));     /* write confirm */
6430         amdgpu_ring_write(ring, reg);
6431         amdgpu_ring_write(ring, 0);
6432         amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
6433                                 adev->virt.reg_val_offs * 4));
6434         amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
6435                                 adev->virt.reg_val_offs * 4));
6436 }
6437
6438 static void gfx_v8_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
6439                                   uint32_t val)
6440 {
6441         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6442         amdgpu_ring_write(ring, (1 << 16)); /* no inc addr */
6443         amdgpu_ring_write(ring, reg);
6444         amdgpu_ring_write(ring, 0);
6445         amdgpu_ring_write(ring, val);
6446 }
6447
6448 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
6449                                                  enum amdgpu_interrupt_state state)
6450 {
6451         WREG32_FIELD(CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE,
6452                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6453 }
6454
6455 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
6456                                                      int me, int pipe,
6457                                                      enum amdgpu_interrupt_state state)
6458 {
6459         u32 mec_int_cntl, mec_int_cntl_reg;
6460
6461         /*
6462          * amdgpu controls only the first MEC. That's why this function only
6463          * handles the setting of interrupts for this specific MEC. All other
6464          * pipes' interrupts are set by amdkfd.
6465          */
6466
6467         if (me == 1) {
6468                 switch (pipe) {
6469                 case 0:
6470                         mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
6471                         break;
6472                 case 1:
6473                         mec_int_cntl_reg = mmCP_ME1_PIPE1_INT_CNTL;
6474                         break;
6475                 case 2:
6476                         mec_int_cntl_reg = mmCP_ME1_PIPE2_INT_CNTL;
6477                         break;
6478                 case 3:
6479                         mec_int_cntl_reg = mmCP_ME1_PIPE3_INT_CNTL;
6480                         break;
6481                 default:
6482                         DRM_DEBUG("invalid pipe %d\n", pipe);
6483                         return;
6484                 }
6485         } else {
6486                 DRM_DEBUG("invalid me %d\n", me);
6487                 return;
6488         }
6489
6490         switch (state) {
6491         case AMDGPU_IRQ_STATE_DISABLE:
6492                 mec_int_cntl = RREG32(mec_int_cntl_reg);
6493                 mec_int_cntl &= ~CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6494                 WREG32(mec_int_cntl_reg, mec_int_cntl);
6495                 break;
6496         case AMDGPU_IRQ_STATE_ENABLE:
6497                 mec_int_cntl = RREG32(mec_int_cntl_reg);
6498                 mec_int_cntl |= CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6499                 WREG32(mec_int_cntl_reg, mec_int_cntl);
6500                 break;
6501         default:
6502                 break;
6503         }
6504 }
6505
6506 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
6507                                              struct amdgpu_irq_src *source,
6508                                              unsigned type,
6509                                              enum amdgpu_interrupt_state state)
6510 {
6511         WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE,
6512                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6513
6514         return 0;
6515 }
6516
6517 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
6518                                               struct amdgpu_irq_src *source,
6519                                               unsigned type,
6520                                               enum amdgpu_interrupt_state state)
6521 {
6522         WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE,
6523                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6524
6525         return 0;
6526 }
6527
6528 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6529                                             struct amdgpu_irq_src *src,
6530                                             unsigned type,
6531                                             enum amdgpu_interrupt_state state)
6532 {
6533         switch (type) {
6534         case AMDGPU_CP_IRQ_GFX_EOP:
6535                 gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
6536                 break;
6537         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6538                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6539                 break;
6540         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6541                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6542                 break;
6543         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6544                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6545                 break;
6546         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6547                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6548                 break;
6549         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6550                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6551                 break;
6552         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6553                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6554                 break;
6555         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6556                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6557                 break;
6558         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6559                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6560                 break;
6561         default:
6562                 break;
6563         }
6564         return 0;
6565 }
6566
6567 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
6568                             struct amdgpu_irq_src *source,
6569                             struct amdgpu_iv_entry *entry)
6570 {
6571         int i;
6572         u8 me_id, pipe_id, queue_id;
6573         struct amdgpu_ring *ring;
6574
6575         DRM_DEBUG("IH: CP EOP\n");
6576         me_id = (entry->ring_id & 0x0c) >> 2;
6577         pipe_id = (entry->ring_id & 0x03) >> 0;
6578         queue_id = (entry->ring_id & 0x70) >> 4;
6579
6580         switch (me_id) {
6581         case 0:
6582                 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6583                 break;
6584         case 1:
6585         case 2:
6586                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6587                         ring = &adev->gfx.compute_ring[i];
6588                         /* Per-queue interrupt is supported for MEC starting from VI.
6589                           * The interrupt can only be enabled/disabled per pipe instead of per queue.
6590                           */
6591                         if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6592                                 amdgpu_fence_process(ring);
6593                 }
6594                 break;
6595         }
6596         return 0;
6597 }
6598
6599 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
6600                                  struct amdgpu_irq_src *source,
6601                                  struct amdgpu_iv_entry *entry)
6602 {
6603         DRM_ERROR("Illegal register access in command stream\n");
6604         schedule_work(&adev->reset_work);
6605         return 0;
6606 }
6607
6608 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
6609                                   struct amdgpu_irq_src *source,
6610                                   struct amdgpu_iv_entry *entry)
6611 {
6612         DRM_ERROR("Illegal instruction in command stream\n");
6613         schedule_work(&adev->reset_work);
6614         return 0;
6615 }
6616
6617 static int gfx_v8_0_kiq_set_interrupt_state(struct amdgpu_device *adev,
6618                                             struct amdgpu_irq_src *src,
6619                                             unsigned int type,
6620                                             enum amdgpu_interrupt_state state)
6621 {
6622         struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
6623
6624         switch (type) {
6625         case AMDGPU_CP_KIQ_IRQ_DRIVER0:
6626                 WREG32_FIELD(CPC_INT_CNTL, GENERIC2_INT_ENABLE,
6627                              state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6628                 if (ring->me == 1)
6629                         WREG32_FIELD_OFFSET(CP_ME1_PIPE0_INT_CNTL,
6630                                      ring->pipe,
6631                                      GENERIC2_INT_ENABLE,
6632                                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6633                 else
6634                         WREG32_FIELD_OFFSET(CP_ME2_PIPE0_INT_CNTL,
6635                                      ring->pipe,
6636                                      GENERIC2_INT_ENABLE,
6637                                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6638                 break;
6639         default:
6640                 BUG(); /* kiq only support GENERIC2_INT now */
6641                 break;
6642         }
6643         return 0;
6644 }
6645
6646 static int gfx_v8_0_kiq_irq(struct amdgpu_device *adev,
6647                             struct amdgpu_irq_src *source,
6648                             struct amdgpu_iv_entry *entry)
6649 {
6650         u8 me_id, pipe_id, queue_id;
6651         struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
6652
6653         me_id = (entry->ring_id & 0x0c) >> 2;
6654         pipe_id = (entry->ring_id & 0x03) >> 0;
6655         queue_id = (entry->ring_id & 0x70) >> 4;
6656         DRM_DEBUG("IH: CPC GENERIC2_INT, me:%d, pipe:%d, queue:%d\n",
6657                    me_id, pipe_id, queue_id);
6658
6659         amdgpu_fence_process(ring);
6660         return 0;
6661 }
6662
6663 static const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
6664         .name = "gfx_v8_0",
6665         .early_init = gfx_v8_0_early_init,
6666         .late_init = gfx_v8_0_late_init,
6667         .sw_init = gfx_v8_0_sw_init,
6668         .sw_fini = gfx_v8_0_sw_fini,
6669         .hw_init = gfx_v8_0_hw_init,
6670         .hw_fini = gfx_v8_0_hw_fini,
6671         .suspend = gfx_v8_0_suspend,
6672         .resume = gfx_v8_0_resume,
6673         .is_idle = gfx_v8_0_is_idle,
6674         .wait_for_idle = gfx_v8_0_wait_for_idle,
6675         .check_soft_reset = gfx_v8_0_check_soft_reset,
6676         .pre_soft_reset = gfx_v8_0_pre_soft_reset,
6677         .soft_reset = gfx_v8_0_soft_reset,
6678         .post_soft_reset = gfx_v8_0_post_soft_reset,
6679         .set_clockgating_state = gfx_v8_0_set_clockgating_state,
6680         .set_powergating_state = gfx_v8_0_set_powergating_state,
6681         .get_clockgating_state = gfx_v8_0_get_clockgating_state,
6682 };
6683
6684 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
6685         .type = AMDGPU_RING_TYPE_GFX,
6686         .align_mask = 0xff,
6687         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6688         .support_64bit_ptrs = false,
6689         .get_rptr = gfx_v8_0_ring_get_rptr,
6690         .get_wptr = gfx_v8_0_ring_get_wptr_gfx,
6691         .set_wptr = gfx_v8_0_ring_set_wptr_gfx,
6692         .emit_frame_size = /* maximum 215dw if count 16 IBs in */
6693                 5 +  /* COND_EXEC */
6694                 7 +  /* PIPELINE_SYNC */
6695                 19 + /* VM_FLUSH */
6696                 8 +  /* FENCE for VM_FLUSH */
6697                 20 + /* GDS switch */
6698                 4 + /* double SWITCH_BUFFER,
6699                        the first COND_EXEC jump to the place just
6700                            prior to this double SWITCH_BUFFER  */
6701                 5 + /* COND_EXEC */
6702                 7 +      /*     HDP_flush */
6703                 4 +      /*     VGT_flush */
6704                 14 + /* CE_META */
6705                 31 + /* DE_META */
6706                 3 + /* CNTX_CTRL */
6707                 5 + /* HDP_INVL */
6708                 8 + 8 + /* FENCE x2 */
6709                 2, /* SWITCH_BUFFER */
6710         .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_gfx */
6711         .emit_ib = gfx_v8_0_ring_emit_ib_gfx,
6712         .emit_fence = gfx_v8_0_ring_emit_fence_gfx,
6713         .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6714         .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6715         .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6716         .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6717         .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
6718         .test_ring = gfx_v8_0_ring_test_ring,
6719         .test_ib = gfx_v8_0_ring_test_ib,
6720         .insert_nop = amdgpu_ring_insert_nop,
6721         .pad_ib = amdgpu_ring_generic_pad_ib,
6722         .emit_switch_buffer = gfx_v8_ring_emit_sb,
6723         .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
6724         .init_cond_exec = gfx_v8_0_ring_emit_init_cond_exec,
6725         .patch_cond_exec = gfx_v8_0_ring_emit_patch_cond_exec,
6726 };
6727
6728 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
6729         .type = AMDGPU_RING_TYPE_COMPUTE,
6730         .align_mask = 0xff,
6731         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6732         .support_64bit_ptrs = false,
6733         .get_rptr = gfx_v8_0_ring_get_rptr,
6734         .get_wptr = gfx_v8_0_ring_get_wptr_compute,
6735         .set_wptr = gfx_v8_0_ring_set_wptr_compute,
6736         .emit_frame_size =
6737                 20 + /* gfx_v8_0_ring_emit_gds_switch */
6738                 7 + /* gfx_v8_0_ring_emit_hdp_flush */
6739                 5 + /* gfx_v8_0_ring_emit_hdp_invalidate */
6740                 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6741                 17 + /* gfx_v8_0_ring_emit_vm_flush */
6742                 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
6743         .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */
6744         .emit_ib = gfx_v8_0_ring_emit_ib_compute,
6745         .emit_fence = gfx_v8_0_ring_emit_fence_compute,
6746         .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6747         .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6748         .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6749         .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6750         .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
6751         .test_ring = gfx_v8_0_ring_test_ring,
6752         .test_ib = gfx_v8_0_ring_test_ib,
6753         .insert_nop = amdgpu_ring_insert_nop,
6754         .pad_ib = amdgpu_ring_generic_pad_ib,
6755 };
6756
6757 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {
6758         .type = AMDGPU_RING_TYPE_KIQ,
6759         .align_mask = 0xff,
6760         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6761         .support_64bit_ptrs = false,
6762         .get_rptr = gfx_v8_0_ring_get_rptr,
6763         .get_wptr = gfx_v8_0_ring_get_wptr_compute,
6764         .set_wptr = gfx_v8_0_ring_set_wptr_compute,
6765         .emit_frame_size =
6766                 20 + /* gfx_v8_0_ring_emit_gds_switch */
6767                 7 + /* gfx_v8_0_ring_emit_hdp_flush */
6768                 5 + /* gfx_v8_0_ring_emit_hdp_invalidate */
6769                 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6770                 17 + /* gfx_v8_0_ring_emit_vm_flush */
6771                 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */
6772         .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */
6773         .emit_ib = gfx_v8_0_ring_emit_ib_compute,
6774         .emit_fence = gfx_v8_0_ring_emit_fence_kiq,
6775         .test_ring = gfx_v8_0_ring_test_ring,
6776         .test_ib = gfx_v8_0_ring_test_ib,
6777         .insert_nop = amdgpu_ring_insert_nop,
6778         .pad_ib = amdgpu_ring_generic_pad_ib,
6779         .emit_rreg = gfx_v8_0_ring_emit_rreg,
6780         .emit_wreg = gfx_v8_0_ring_emit_wreg,
6781 };
6782
6783 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
6784 {
6785         int i;
6786
6787         adev->gfx.kiq.ring.funcs = &gfx_v8_0_ring_funcs_kiq;
6788
6789         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
6790                 adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
6791
6792         for (i = 0; i < adev->gfx.num_compute_rings; i++)
6793                 adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
6794 }
6795
6796 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
6797         .set = gfx_v8_0_set_eop_interrupt_state,
6798         .process = gfx_v8_0_eop_irq,
6799 };
6800
6801 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
6802         .set = gfx_v8_0_set_priv_reg_fault_state,
6803         .process = gfx_v8_0_priv_reg_irq,
6804 };
6805
6806 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
6807         .set = gfx_v8_0_set_priv_inst_fault_state,
6808         .process = gfx_v8_0_priv_inst_irq,
6809 };
6810
6811 static const struct amdgpu_irq_src_funcs gfx_v8_0_kiq_irq_funcs = {
6812         .set = gfx_v8_0_kiq_set_interrupt_state,
6813         .process = gfx_v8_0_kiq_irq,
6814 };
6815
6816 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
6817 {
6818         adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
6819         adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
6820
6821         adev->gfx.priv_reg_irq.num_types = 1;
6822         adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
6823
6824         adev->gfx.priv_inst_irq.num_types = 1;
6825         adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
6826
6827         adev->gfx.kiq.irq.num_types = AMDGPU_CP_KIQ_IRQ_LAST;
6828         adev->gfx.kiq.irq.funcs = &gfx_v8_0_kiq_irq_funcs;
6829 }
6830
6831 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
6832 {
6833         adev->gfx.rlc.funcs = &iceland_rlc_funcs;
6834 }
6835
6836 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
6837 {
6838         /* init asci gds info */
6839         adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
6840         adev->gds.gws.total_size = 64;
6841         adev->gds.oa.total_size = 16;
6842
6843         if (adev->gds.mem.total_size == 64 * 1024) {
6844                 adev->gds.mem.gfx_partition_size = 4096;
6845                 adev->gds.mem.cs_partition_size = 4096;
6846
6847                 adev->gds.gws.gfx_partition_size = 4;
6848                 adev->gds.gws.cs_partition_size = 4;
6849
6850                 adev->gds.oa.gfx_partition_size = 4;
6851                 adev->gds.oa.cs_partition_size = 1;
6852         } else {
6853                 adev->gds.mem.gfx_partition_size = 1024;
6854                 adev->gds.mem.cs_partition_size = 1024;
6855
6856                 adev->gds.gws.gfx_partition_size = 16;
6857                 adev->gds.gws.cs_partition_size = 16;
6858
6859                 adev->gds.oa.gfx_partition_size = 4;
6860                 adev->gds.oa.cs_partition_size = 4;
6861         }
6862 }
6863
6864 static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
6865                                                  u32 bitmap)
6866 {
6867         u32 data;
6868
6869         if (!bitmap)
6870                 return;
6871
6872         data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
6873         data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
6874
6875         WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
6876 }
6877
6878 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
6879 {
6880         u32 data, mask;
6881
6882         data =  RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) |
6883                 RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
6884
6885         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
6886
6887         return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask;
6888 }
6889
6890 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
6891 {
6892         int i, j, k, counter, active_cu_number = 0;
6893         u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
6894         struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
6895         unsigned disable_masks[4 * 2];
6896         u32 ao_cu_num;
6897
6898         memset(cu_info, 0, sizeof(*cu_info));
6899
6900         if (adev->flags & AMD_IS_APU)
6901                 ao_cu_num = 2;
6902         else
6903                 ao_cu_num = adev->gfx.config.max_cu_per_sh;
6904
6905         amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
6906
6907         mutex_lock(&adev->grbm_idx_mutex);
6908         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
6909                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
6910                         mask = 1;
6911                         ao_bitmap = 0;
6912                         counter = 0;
6913                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
6914                         if (i < 4 && j < 2)
6915                                 gfx_v8_0_set_user_cu_inactive_bitmap(
6916                                         adev, disable_masks[i * 2 + j]);
6917                         bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
6918                         cu_info->bitmap[i][j] = bitmap;
6919
6920                         for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
6921                                 if (bitmap & mask) {
6922                                         if (counter < ao_cu_num)
6923                                                 ao_bitmap |= mask;
6924                                         counter ++;
6925                                 }
6926                                 mask <<= 1;
6927                         }
6928                         active_cu_number += counter;
6929                         if (i < 2 && j < 2)
6930                                 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
6931                         cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
6932                 }
6933         }
6934         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6935         mutex_unlock(&adev->grbm_idx_mutex);
6936
6937         cu_info->number = active_cu_number;
6938         cu_info->ao_cu_mask = ao_cu_mask;
6939 }
6940
6941 const struct amdgpu_ip_block_version gfx_v8_0_ip_block =
6942 {
6943         .type = AMD_IP_BLOCK_TYPE_GFX,
6944         .major = 8,
6945         .minor = 0,
6946         .rev = 0,
6947         .funcs = &gfx_v8_0_ip_funcs,
6948 };
6949
6950 const struct amdgpu_ip_block_version gfx_v8_1_ip_block =
6951 {
6952         .type = AMD_IP_BLOCK_TYPE_GFX,
6953         .major = 8,
6954         .minor = 1,
6955         .rev = 0,
6956         .funcs = &gfx_v8_0_ip_funcs,
6957 };
6958
6959 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
6960 {
6961         uint64_t ce_payload_addr;
6962         int cnt_ce;
6963         static union {
6964                 struct vi_ce_ib_state regular;
6965                 struct vi_ce_ib_state_chained_ib chained;
6966         } ce_payload = {};
6967
6968         if (ring->adev->virt.chained_ib_support) {
6969                 ce_payload_addr = AMDGPU_VA_RESERVED_SIZE - 2 * 4096 +
6970                                                   offsetof(struct vi_gfx_meta_data_chained_ib, ce_payload);
6971                 cnt_ce = (sizeof(ce_payload.chained) >> 2) + 4 - 2;
6972         } else {
6973                 ce_payload_addr = AMDGPU_VA_RESERVED_SIZE - 2 * 4096 +
6974                                                   offsetof(struct vi_gfx_meta_data, ce_payload);
6975                 cnt_ce = (sizeof(ce_payload.regular) >> 2) + 4 - 2;
6976         }
6977
6978         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_ce));
6979         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
6980                                 WRITE_DATA_DST_SEL(8) |
6981                                 WR_CONFIRM) |
6982                                 WRITE_DATA_CACHE_POLICY(0));
6983         amdgpu_ring_write(ring, lower_32_bits(ce_payload_addr));
6984         amdgpu_ring_write(ring, upper_32_bits(ce_payload_addr));
6985         amdgpu_ring_write_multiple(ring, (void *)&ce_payload, cnt_ce - 2);
6986 }
6987
6988 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring)
6989 {
6990         uint64_t de_payload_addr, gds_addr, csa_addr;
6991         int cnt_de;
6992         static union {
6993                 struct vi_de_ib_state regular;
6994                 struct vi_de_ib_state_chained_ib chained;
6995         } de_payload = {};
6996
6997         csa_addr = AMDGPU_VA_RESERVED_SIZE - 2 * 4096;
6998         gds_addr = csa_addr + 4096;
6999         if (ring->adev->virt.chained_ib_support) {
7000                 de_payload.chained.gds_backup_addrlo = lower_32_bits(gds_addr);
7001                 de_payload.chained.gds_backup_addrhi = upper_32_bits(gds_addr);
7002                 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data_chained_ib, de_payload);
7003                 cnt_de = (sizeof(de_payload.chained) >> 2) + 4 - 2;
7004         } else {
7005                 de_payload.regular.gds_backup_addrlo = lower_32_bits(gds_addr);
7006                 de_payload.regular.gds_backup_addrhi = upper_32_bits(gds_addr);
7007                 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data, de_payload);
7008                 cnt_de = (sizeof(de_payload.regular) >> 2) + 4 - 2;
7009         }
7010
7011         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_de));
7012         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
7013                                 WRITE_DATA_DST_SEL(8) |
7014                                 WR_CONFIRM) |
7015                                 WRITE_DATA_CACHE_POLICY(0));
7016         amdgpu_ring_write(ring, lower_32_bits(de_payload_addr));
7017         amdgpu_ring_write(ring, upper_32_bits(de_payload_addr));
7018         amdgpu_ring_write_multiple(ring, (void *)&de_payload, cnt_de - 2);
7019 }
This page took 0.473556 seconds and 4 git commands to generate.