]> Git Repo - linux.git/blob - drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
Merge tag 'drm/tegra/for-4.12-rc1' of git://anongit.freedesktop.org/tegra/linux into...
[linux.git] / drivers / gpu / drm / amd / amdgpu / gfx_v8_0.c
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 #include <linux/firmware.h>
24 #include "drmP.h"
25 #include "amdgpu.h"
26 #include "amdgpu_gfx.h"
27 #include "vi.h"
28 #include "vi_structs.h"
29 #include "vid.h"
30 #include "amdgpu_ucode.h"
31 #include "amdgpu_atombios.h"
32 #include "atombios_i2c.h"
33 #include "clearstate_vi.h"
34
35 #include "gmc/gmc_8_2_d.h"
36 #include "gmc/gmc_8_2_sh_mask.h"
37
38 #include "oss/oss_3_0_d.h"
39 #include "oss/oss_3_0_sh_mask.h"
40
41 #include "bif/bif_5_0_d.h"
42 #include "bif/bif_5_0_sh_mask.h"
43
44 #include "gca/gfx_8_0_d.h"
45 #include "gca/gfx_8_0_enum.h"
46 #include "gca/gfx_8_0_sh_mask.h"
47 #include "gca/gfx_8_0_enum.h"
48
49 #include "dce/dce_10_0_d.h"
50 #include "dce/dce_10_0_sh_mask.h"
51
52 #include "smu/smu_7_1_3_d.h"
53
54 #define GFX8_NUM_GFX_RINGS     1
55 #define GFX8_NUM_COMPUTE_RINGS 8
56
57 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
58 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
59 #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
60 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
61
62 #define ARRAY_MODE(x)                                   ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
63 #define PIPE_CONFIG(x)                                  ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
64 #define TILE_SPLIT(x)                                   ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
65 #define MICRO_TILE_MODE_NEW(x)                          ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
66 #define SAMPLE_SPLIT(x)                                 ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
67 #define BANK_WIDTH(x)                                   ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
68 #define BANK_HEIGHT(x)                                  ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
69 #define MACRO_TILE_ASPECT(x)                            ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
70 #define NUM_BANKS(x)                                    ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
71
72 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK            0x00000001L
73 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK            0x00000002L
74 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK           0x00000004L
75 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK           0x00000008L
76 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK           0x00000010L
77 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK           0x00000020L
78
79 /* BPM SERDES CMD */
80 #define SET_BPM_SERDES_CMD    1
81 #define CLE_BPM_SERDES_CMD    0
82
83 /* BPM Register Address*/
84 enum {
85         BPM_REG_CGLS_EN = 0,        /* Enable/Disable CGLS */
86         BPM_REG_CGLS_ON,            /* ON/OFF CGLS: shall be controlled by RLC FW */
87         BPM_REG_CGCG_OVERRIDE,      /* Set/Clear CGCG Override */
88         BPM_REG_MGCG_OVERRIDE,      /* Set/Clear MGCG Override */
89         BPM_REG_FGCG_OVERRIDE,      /* Set/Clear FGCG Override */
90         BPM_REG_FGCG_MAX
91 };
92
93 #define RLC_FormatDirectRegListLength        14
94
95 MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
96 MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
97 MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
98 MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
99 MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
100 MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
101
102 MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
103 MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
104 MODULE_FIRMWARE("amdgpu/stoney_me.bin");
105 MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
106 MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
107
108 MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
109 MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
110 MODULE_FIRMWARE("amdgpu/tonga_me.bin");
111 MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
112 MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
113 MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
114
115 MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
116 MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
117 MODULE_FIRMWARE("amdgpu/topaz_me.bin");
118 MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
119 MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
120
121 MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
122 MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
123 MODULE_FIRMWARE("amdgpu/fiji_me.bin");
124 MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
125 MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
126 MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
127
128 MODULE_FIRMWARE("amdgpu/polaris11_ce.bin");
129 MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin");
130 MODULE_FIRMWARE("amdgpu/polaris11_me.bin");
131 MODULE_FIRMWARE("amdgpu/polaris11_mec.bin");
132 MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin");
133 MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin");
134
135 MODULE_FIRMWARE("amdgpu/polaris10_ce.bin");
136 MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin");
137 MODULE_FIRMWARE("amdgpu/polaris10_me.bin");
138 MODULE_FIRMWARE("amdgpu/polaris10_mec.bin");
139 MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin");
140 MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin");
141
142 MODULE_FIRMWARE("amdgpu/polaris12_ce.bin");
143 MODULE_FIRMWARE("amdgpu/polaris12_pfp.bin");
144 MODULE_FIRMWARE("amdgpu/polaris12_me.bin");
145 MODULE_FIRMWARE("amdgpu/polaris12_mec.bin");
146 MODULE_FIRMWARE("amdgpu/polaris12_mec2.bin");
147 MODULE_FIRMWARE("amdgpu/polaris12_rlc.bin");
148
149 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
150 {
151         {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
152         {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
153         {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
154         {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
155         {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
156         {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
157         {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
158         {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
159         {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
160         {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
161         {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
162         {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
163         {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
164         {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
165         {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
166         {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
167 };
168
169 static const u32 golden_settings_tonga_a11[] =
170 {
171         mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
172         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
173         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
174         mmGB_GPU_ID, 0x0000000f, 0x00000000,
175         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
176         mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
177         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
178         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
179         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
180         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
181         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
182         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
183         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
184         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
185         mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
186         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
187 };
188
189 static const u32 tonga_golden_common_all[] =
190 {
191         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
192         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
193         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
194         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
195         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
196         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
197         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
198         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
199 };
200
201 static const u32 tonga_mgcg_cgcg_init[] =
202 {
203         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
204         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
205         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
206         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
207         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
208         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
209         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
210         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
211         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
212         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
213         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
214         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
215         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
216         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
217         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
218         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
219         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
220         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
221         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
222         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
223         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
224         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
225         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
226         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
227         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
228         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
229         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
230         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
231         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
232         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
233         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
234         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
235         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
236         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
237         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
238         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
239         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
240         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
241         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
242         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
243         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
244         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
245         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
246         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
247         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
248         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
249         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
250         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
251         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
252         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
253         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
254         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
255         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
256         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
257         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
258         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
259         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
260         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
261         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
262         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
263         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
264         mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
265         mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
266         mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
267         mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
268         mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
269         mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
270         mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
271         mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
272         mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
273         mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
274         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
275         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
276         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
277         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
278 };
279
280 static const u32 golden_settings_polaris11_a11[] =
281 {
282         mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208,
283         mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
284         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
285         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
286         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
287         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
288         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
289         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
290         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
291         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
292         mmSQ_CONFIG, 0x07f80000, 0x01180000,
293         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
294         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
295         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
296         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
297         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
298         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
299 };
300
301 static const u32 polaris11_golden_common_all[] =
302 {
303         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
304         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002,
305         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
306         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
307         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
308         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
309 };
310
311 static const u32 golden_settings_polaris10_a11[] =
312 {
313         mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
314         mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
315         mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
316         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
317         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
318         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
319         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
320         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
321         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a,
322         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
323         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
324         mmSQ_CONFIG, 0x07f80000, 0x07180000,
325         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
326         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
327         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
328         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
329         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
330 };
331
332 static const u32 polaris10_golden_common_all[] =
333 {
334         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
335         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
336         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
337         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
338         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
339         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
340         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
341         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
342 };
343
344 static const u32 fiji_golden_common_all[] =
345 {
346         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
347         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
348         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
349         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
350         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
351         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
352         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
353         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
354         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
355         mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
356 };
357
358 static const u32 golden_settings_fiji_a10[] =
359 {
360         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
361         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
362         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
363         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
364         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
365         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
366         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
367         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
368         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
369         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
370         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
371 };
372
373 static const u32 fiji_mgcg_cgcg_init[] =
374 {
375         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
376         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
377         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
378         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
379         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
380         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
381         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
382         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
383         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
384         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
385         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
386         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
387         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
388         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
389         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
390         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
391         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
392         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
393         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
394         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
395         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
396         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
397         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
398         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
399         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
400         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
401         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
402         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
403         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
404         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
405         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
406         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
407         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
408         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
409         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
410 };
411
412 static const u32 golden_settings_iceland_a11[] =
413 {
414         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
415         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
416         mmDB_DEBUG3, 0xc0000000, 0xc0000000,
417         mmGB_GPU_ID, 0x0000000f, 0x00000000,
418         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
419         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
420         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
421         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
422         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
423         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
424         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
425         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
426         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
427         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
428         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
429         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
430 };
431
432 static const u32 iceland_golden_common_all[] =
433 {
434         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
435         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
436         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
437         mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
438         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
439         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
440         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
441         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
442 };
443
444 static const u32 iceland_mgcg_cgcg_init[] =
445 {
446         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
447         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
448         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
449         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
450         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
451         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
452         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
453         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
454         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
455         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
456         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
457         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
458         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
459         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
460         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
461         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
462         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
463         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
464         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
465         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
466         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
467         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
468         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
469         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
470         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
471         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
472         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
473         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
474         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
475         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
476         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
477         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
478         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
479         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
480         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
481         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
482         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
483         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
484         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
485         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
486         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
487         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
488         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
489         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
490         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
491         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
492         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
493         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
494         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
495         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
496         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
497         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
498         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
499         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
500         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
501         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
502         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
503         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
504         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
505         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
506         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
507         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
508         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
509         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
510 };
511
512 static const u32 cz_golden_settings_a11[] =
513 {
514         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
515         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
516         mmGB_GPU_ID, 0x0000000f, 0x00000000,
517         mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
518         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
519         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
520         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
521         mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
522         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
523         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
524         mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
525         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
526 };
527
528 static const u32 cz_golden_common_all[] =
529 {
530         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
531         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
532         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
533         mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
534         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
535         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
536         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
537         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
538 };
539
540 static const u32 cz_mgcg_cgcg_init[] =
541 {
542         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
543         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
544         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
545         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
546         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
547         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
548         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
549         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
550         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
551         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
552         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
553         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
554         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
555         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
556         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
557         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
558         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
559         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
560         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
561         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
562         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
563         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
564         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
565         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
566         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
567         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
568         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
569         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
570         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
571         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
572         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
573         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
574         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
575         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
576         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
577         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
578         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
579         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
580         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
581         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
582         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
583         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
584         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
585         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
586         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
587         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
588         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
589         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
590         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
591         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
592         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
593         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
594         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
595         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
596         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
597         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
598         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
599         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
600         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
601         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
602         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
603         mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
604         mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
605         mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
606         mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
607         mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
608         mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
609         mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
610         mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
611         mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
612         mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
613         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
614         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
615         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
616         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
617 };
618
619 static const u32 stoney_golden_settings_a11[] =
620 {
621         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
622         mmGB_GPU_ID, 0x0000000f, 0x00000000,
623         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
624         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
625         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
626         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
627         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
628         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
629         mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
630         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
631 };
632
633 static const u32 stoney_golden_common_all[] =
634 {
635         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
636         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
637         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
638         mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
639         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
640         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
641         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
642         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
643 };
644
645 static const u32 stoney_mgcg_cgcg_init[] =
646 {
647         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
648         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
649         mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
650         mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
651         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
652 };
653
654 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
655 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
656 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
657 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
658 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
659 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
660 static void gfx_v8_0_ring_emit_ce_meta_init(struct amdgpu_ring *ring, uint64_t addr);
661 static void gfx_v8_0_ring_emit_de_meta_init(struct amdgpu_ring *ring, uint64_t addr);
662 static int gfx_v8_0_compute_mqd_sw_init(struct amdgpu_device *adev);
663 static void gfx_v8_0_compute_mqd_sw_fini(struct amdgpu_device *adev);
664
665 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
666 {
667         switch (adev->asic_type) {
668         case CHIP_TOPAZ:
669                 amdgpu_program_register_sequence(adev,
670                                                  iceland_mgcg_cgcg_init,
671                                                  (const u32)ARRAY_SIZE(iceland_mgcg_cgcg_init));
672                 amdgpu_program_register_sequence(adev,
673                                                  golden_settings_iceland_a11,
674                                                  (const u32)ARRAY_SIZE(golden_settings_iceland_a11));
675                 amdgpu_program_register_sequence(adev,
676                                                  iceland_golden_common_all,
677                                                  (const u32)ARRAY_SIZE(iceland_golden_common_all));
678                 break;
679         case CHIP_FIJI:
680                 amdgpu_program_register_sequence(adev,
681                                                  fiji_mgcg_cgcg_init,
682                                                  (const u32)ARRAY_SIZE(fiji_mgcg_cgcg_init));
683                 amdgpu_program_register_sequence(adev,
684                                                  golden_settings_fiji_a10,
685                                                  (const u32)ARRAY_SIZE(golden_settings_fiji_a10));
686                 amdgpu_program_register_sequence(adev,
687                                                  fiji_golden_common_all,
688                                                  (const u32)ARRAY_SIZE(fiji_golden_common_all));
689                 break;
690
691         case CHIP_TONGA:
692                 amdgpu_program_register_sequence(adev,
693                                                  tonga_mgcg_cgcg_init,
694                                                  (const u32)ARRAY_SIZE(tonga_mgcg_cgcg_init));
695                 amdgpu_program_register_sequence(adev,
696                                                  golden_settings_tonga_a11,
697                                                  (const u32)ARRAY_SIZE(golden_settings_tonga_a11));
698                 amdgpu_program_register_sequence(adev,
699                                                  tonga_golden_common_all,
700                                                  (const u32)ARRAY_SIZE(tonga_golden_common_all));
701                 break;
702         case CHIP_POLARIS11:
703         case CHIP_POLARIS12:
704                 amdgpu_program_register_sequence(adev,
705                                                  golden_settings_polaris11_a11,
706                                                  (const u32)ARRAY_SIZE(golden_settings_polaris11_a11));
707                 amdgpu_program_register_sequence(adev,
708                                                  polaris11_golden_common_all,
709                                                  (const u32)ARRAY_SIZE(polaris11_golden_common_all));
710                 break;
711         case CHIP_POLARIS10:
712                 amdgpu_program_register_sequence(adev,
713                                                  golden_settings_polaris10_a11,
714                                                  (const u32)ARRAY_SIZE(golden_settings_polaris10_a11));
715                 amdgpu_program_register_sequence(adev,
716                                                  polaris10_golden_common_all,
717                                                  (const u32)ARRAY_SIZE(polaris10_golden_common_all));
718                 WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C);
719                 if (adev->pdev->revision == 0xc7 &&
720                     ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) ||
721                      (adev->pdev->subsystem_device == 0x4a8 && adev->pdev->subsystem_vendor == 0x1043) ||
722                      (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1682))) {
723                         amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD);
724                         amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0);
725                 }
726                 break;
727         case CHIP_CARRIZO:
728                 amdgpu_program_register_sequence(adev,
729                                                  cz_mgcg_cgcg_init,
730                                                  (const u32)ARRAY_SIZE(cz_mgcg_cgcg_init));
731                 amdgpu_program_register_sequence(adev,
732                                                  cz_golden_settings_a11,
733                                                  (const u32)ARRAY_SIZE(cz_golden_settings_a11));
734                 amdgpu_program_register_sequence(adev,
735                                                  cz_golden_common_all,
736                                                  (const u32)ARRAY_SIZE(cz_golden_common_all));
737                 break;
738         case CHIP_STONEY:
739                 amdgpu_program_register_sequence(adev,
740                                                  stoney_mgcg_cgcg_init,
741                                                  (const u32)ARRAY_SIZE(stoney_mgcg_cgcg_init));
742                 amdgpu_program_register_sequence(adev,
743                                                  stoney_golden_settings_a11,
744                                                  (const u32)ARRAY_SIZE(stoney_golden_settings_a11));
745                 amdgpu_program_register_sequence(adev,
746                                                  stoney_golden_common_all,
747                                                  (const u32)ARRAY_SIZE(stoney_golden_common_all));
748                 break;
749         default:
750                 break;
751         }
752 }
753
754 static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
755 {
756         adev->gfx.scratch.num_reg = 7;
757         adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
758         adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
759 }
760
761 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
762 {
763         struct amdgpu_device *adev = ring->adev;
764         uint32_t scratch;
765         uint32_t tmp = 0;
766         unsigned i;
767         int r;
768
769         r = amdgpu_gfx_scratch_get(adev, &scratch);
770         if (r) {
771                 DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
772                 return r;
773         }
774         WREG32(scratch, 0xCAFEDEAD);
775         r = amdgpu_ring_alloc(ring, 3);
776         if (r) {
777                 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
778                           ring->idx, r);
779                 amdgpu_gfx_scratch_free(adev, scratch);
780                 return r;
781         }
782         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
783         amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
784         amdgpu_ring_write(ring, 0xDEADBEEF);
785         amdgpu_ring_commit(ring);
786
787         for (i = 0; i < adev->usec_timeout; i++) {
788                 tmp = RREG32(scratch);
789                 if (tmp == 0xDEADBEEF)
790                         break;
791                 DRM_UDELAY(1);
792         }
793         if (i < adev->usec_timeout) {
794                 DRM_INFO("ring test on %d succeeded in %d usecs\n",
795                          ring->idx, i);
796         } else {
797                 DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
798                           ring->idx, scratch, tmp);
799                 r = -EINVAL;
800         }
801         amdgpu_gfx_scratch_free(adev, scratch);
802         return r;
803 }
804
805 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
806 {
807         struct amdgpu_device *adev = ring->adev;
808         struct amdgpu_ib ib;
809         struct dma_fence *f = NULL;
810         uint32_t scratch;
811         uint32_t tmp = 0;
812         long r;
813
814         r = amdgpu_gfx_scratch_get(adev, &scratch);
815         if (r) {
816                 DRM_ERROR("amdgpu: failed to get scratch reg (%ld).\n", r);
817                 return r;
818         }
819         WREG32(scratch, 0xCAFEDEAD);
820         memset(&ib, 0, sizeof(ib));
821         r = amdgpu_ib_get(adev, NULL, 256, &ib);
822         if (r) {
823                 DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
824                 goto err1;
825         }
826         ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
827         ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START));
828         ib.ptr[2] = 0xDEADBEEF;
829         ib.length_dw = 3;
830
831         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
832         if (r)
833                 goto err2;
834
835         r = dma_fence_wait_timeout(f, false, timeout);
836         if (r == 0) {
837                 DRM_ERROR("amdgpu: IB test timed out.\n");
838                 r = -ETIMEDOUT;
839                 goto err2;
840         } else if (r < 0) {
841                 DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
842                 goto err2;
843         }
844         tmp = RREG32(scratch);
845         if (tmp == 0xDEADBEEF) {
846                 DRM_INFO("ib test on ring %d succeeded\n", ring->idx);
847                 r = 0;
848         } else {
849                 DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n",
850                           scratch, tmp);
851                 r = -EINVAL;
852         }
853 err2:
854         amdgpu_ib_free(adev, &ib, NULL);
855         dma_fence_put(f);
856 err1:
857         amdgpu_gfx_scratch_free(adev, scratch);
858         return r;
859 }
860
861
862 static void gfx_v8_0_free_microcode(struct amdgpu_device *adev) {
863         release_firmware(adev->gfx.pfp_fw);
864         adev->gfx.pfp_fw = NULL;
865         release_firmware(adev->gfx.me_fw);
866         adev->gfx.me_fw = NULL;
867         release_firmware(adev->gfx.ce_fw);
868         adev->gfx.ce_fw = NULL;
869         release_firmware(adev->gfx.rlc_fw);
870         adev->gfx.rlc_fw = NULL;
871         release_firmware(adev->gfx.mec_fw);
872         adev->gfx.mec_fw = NULL;
873         if ((adev->asic_type != CHIP_STONEY) &&
874             (adev->asic_type != CHIP_TOPAZ))
875                 release_firmware(adev->gfx.mec2_fw);
876         adev->gfx.mec2_fw = NULL;
877
878         kfree(adev->gfx.rlc.register_list_format);
879 }
880
881 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
882 {
883         const char *chip_name;
884         char fw_name[30];
885         int err;
886         struct amdgpu_firmware_info *info = NULL;
887         const struct common_firmware_header *header = NULL;
888         const struct gfx_firmware_header_v1_0 *cp_hdr;
889         const struct rlc_firmware_header_v2_0 *rlc_hdr;
890         unsigned int *tmp = NULL, i;
891
892         DRM_DEBUG("\n");
893
894         switch (adev->asic_type) {
895         case CHIP_TOPAZ:
896                 chip_name = "topaz";
897                 break;
898         case CHIP_TONGA:
899                 chip_name = "tonga";
900                 break;
901         case CHIP_CARRIZO:
902                 chip_name = "carrizo";
903                 break;
904         case CHIP_FIJI:
905                 chip_name = "fiji";
906                 break;
907         case CHIP_POLARIS11:
908                 chip_name = "polaris11";
909                 break;
910         case CHIP_POLARIS10:
911                 chip_name = "polaris10";
912                 break;
913         case CHIP_POLARIS12:
914                 chip_name = "polaris12";
915                 break;
916         case CHIP_STONEY:
917                 chip_name = "stoney";
918                 break;
919         default:
920                 BUG();
921         }
922
923         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
924         err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
925         if (err)
926                 goto out;
927         err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
928         if (err)
929                 goto out;
930         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
931         adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
932         adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
933
934         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
935         err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
936         if (err)
937                 goto out;
938         err = amdgpu_ucode_validate(adev->gfx.me_fw);
939         if (err)
940                 goto out;
941         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
942         adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
943
944         /* chain ib ucode isn't formal released, just disable it by far
945          * TODO: when ucod ready we should use ucode version to judge if
946          * chain-ib support or not.
947          */
948         adev->virt.chained_ib_support = false;
949
950         adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
951
952         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
953         err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
954         if (err)
955                 goto out;
956         err = amdgpu_ucode_validate(adev->gfx.ce_fw);
957         if (err)
958                 goto out;
959         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
960         adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
961         adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
962
963         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
964         err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
965         if (err)
966                 goto out;
967         err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
968         rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
969         adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
970         adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
971
972         adev->gfx.rlc.save_and_restore_offset =
973                         le32_to_cpu(rlc_hdr->save_and_restore_offset);
974         adev->gfx.rlc.clear_state_descriptor_offset =
975                         le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
976         adev->gfx.rlc.avail_scratch_ram_locations =
977                         le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
978         adev->gfx.rlc.reg_restore_list_size =
979                         le32_to_cpu(rlc_hdr->reg_restore_list_size);
980         adev->gfx.rlc.reg_list_format_start =
981                         le32_to_cpu(rlc_hdr->reg_list_format_start);
982         adev->gfx.rlc.reg_list_format_separate_start =
983                         le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
984         adev->gfx.rlc.starting_offsets_start =
985                         le32_to_cpu(rlc_hdr->starting_offsets_start);
986         adev->gfx.rlc.reg_list_format_size_bytes =
987                         le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
988         adev->gfx.rlc.reg_list_size_bytes =
989                         le32_to_cpu(rlc_hdr->reg_list_size_bytes);
990
991         adev->gfx.rlc.register_list_format =
992                         kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
993                                         adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
994
995         if (!adev->gfx.rlc.register_list_format) {
996                 err = -ENOMEM;
997                 goto out;
998         }
999
1000         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1001                         le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1002         for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++)
1003                 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
1004
1005         adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1006
1007         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1008                         le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1009         for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++)
1010                 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
1011
1012         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1013         err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1014         if (err)
1015                 goto out;
1016         err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1017         if (err)
1018                 goto out;
1019         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1020         adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1021         adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1022
1023         if ((adev->asic_type != CHIP_STONEY) &&
1024             (adev->asic_type != CHIP_TOPAZ)) {
1025                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1026                 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1027                 if (!err) {
1028                         err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1029                         if (err)
1030                                 goto out;
1031                         cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1032                                 adev->gfx.mec2_fw->data;
1033                         adev->gfx.mec2_fw_version =
1034                                 le32_to_cpu(cp_hdr->header.ucode_version);
1035                         adev->gfx.mec2_feature_version =
1036                                 le32_to_cpu(cp_hdr->ucode_feature_version);
1037                 } else {
1038                         err = 0;
1039                         adev->gfx.mec2_fw = NULL;
1040                 }
1041         }
1042
1043         if (adev->firmware.load_type == AMDGPU_FW_LOAD_SMU) {
1044                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1045                 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1046                 info->fw = adev->gfx.pfp_fw;
1047                 header = (const struct common_firmware_header *)info->fw->data;
1048                 adev->firmware.fw_size +=
1049                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1050
1051                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1052                 info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1053                 info->fw = adev->gfx.me_fw;
1054                 header = (const struct common_firmware_header *)info->fw->data;
1055                 adev->firmware.fw_size +=
1056                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1057
1058                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1059                 info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1060                 info->fw = adev->gfx.ce_fw;
1061                 header = (const struct common_firmware_header *)info->fw->data;
1062                 adev->firmware.fw_size +=
1063                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1064
1065                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1066                 info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1067                 info->fw = adev->gfx.rlc_fw;
1068                 header = (const struct common_firmware_header *)info->fw->data;
1069                 adev->firmware.fw_size +=
1070                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1071
1072                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1073                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1074                 info->fw = adev->gfx.mec_fw;
1075                 header = (const struct common_firmware_header *)info->fw->data;
1076                 adev->firmware.fw_size +=
1077                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1078
1079                 /* we need account JT in */
1080                 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1081                 adev->firmware.fw_size +=
1082                         ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE);
1083
1084                 if (amdgpu_sriov_vf(adev)) {
1085                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_STORAGE];
1086                         info->ucode_id = AMDGPU_UCODE_ID_STORAGE;
1087                         info->fw = adev->gfx.mec_fw;
1088                         adev->firmware.fw_size +=
1089                                 ALIGN(le32_to_cpu(64 * PAGE_SIZE), PAGE_SIZE);
1090                 }
1091
1092                 if (adev->gfx.mec2_fw) {
1093                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1094                         info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1095                         info->fw = adev->gfx.mec2_fw;
1096                         header = (const struct common_firmware_header *)info->fw->data;
1097                         adev->firmware.fw_size +=
1098                                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1099                 }
1100
1101         }
1102
1103 out:
1104         if (err) {
1105                 dev_err(adev->dev,
1106                         "gfx8: Failed to load firmware \"%s\"\n",
1107                         fw_name);
1108                 release_firmware(adev->gfx.pfp_fw);
1109                 adev->gfx.pfp_fw = NULL;
1110                 release_firmware(adev->gfx.me_fw);
1111                 adev->gfx.me_fw = NULL;
1112                 release_firmware(adev->gfx.ce_fw);
1113                 adev->gfx.ce_fw = NULL;
1114                 release_firmware(adev->gfx.rlc_fw);
1115                 adev->gfx.rlc_fw = NULL;
1116                 release_firmware(adev->gfx.mec_fw);
1117                 adev->gfx.mec_fw = NULL;
1118                 release_firmware(adev->gfx.mec2_fw);
1119                 adev->gfx.mec2_fw = NULL;
1120         }
1121         return err;
1122 }
1123
1124 static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
1125                                     volatile u32 *buffer)
1126 {
1127         u32 count = 0, i;
1128         const struct cs_section_def *sect = NULL;
1129         const struct cs_extent_def *ext = NULL;
1130
1131         if (adev->gfx.rlc.cs_data == NULL)
1132                 return;
1133         if (buffer == NULL)
1134                 return;
1135
1136         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1137         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1138
1139         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1140         buffer[count++] = cpu_to_le32(0x80000000);
1141         buffer[count++] = cpu_to_le32(0x80000000);
1142
1143         for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1144                 for (ext = sect->section; ext->extent != NULL; ++ext) {
1145                         if (sect->id == SECT_CONTEXT) {
1146                                 buffer[count++] =
1147                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1148                                 buffer[count++] = cpu_to_le32(ext->reg_index -
1149                                                 PACKET3_SET_CONTEXT_REG_START);
1150                                 for (i = 0; i < ext->reg_count; i++)
1151                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
1152                         } else {
1153                                 return;
1154                         }
1155                 }
1156         }
1157
1158         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1159         buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
1160                         PACKET3_SET_CONTEXT_REG_START);
1161         buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config);
1162         buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1);
1163
1164         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1165         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1166
1167         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1168         buffer[count++] = cpu_to_le32(0);
1169 }
1170
1171 static void cz_init_cp_jump_table(struct amdgpu_device *adev)
1172 {
1173         const __le32 *fw_data;
1174         volatile u32 *dst_ptr;
1175         int me, i, max_me = 4;
1176         u32 bo_offset = 0;
1177         u32 table_offset, table_size;
1178
1179         if (adev->asic_type == CHIP_CARRIZO)
1180                 max_me = 5;
1181
1182         /* write the cp table buffer */
1183         dst_ptr = adev->gfx.rlc.cp_table_ptr;
1184         for (me = 0; me < max_me; me++) {
1185                 if (me == 0) {
1186                         const struct gfx_firmware_header_v1_0 *hdr =
1187                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1188                         fw_data = (const __le32 *)
1189                                 (adev->gfx.ce_fw->data +
1190                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1191                         table_offset = le32_to_cpu(hdr->jt_offset);
1192                         table_size = le32_to_cpu(hdr->jt_size);
1193                 } else if (me == 1) {
1194                         const struct gfx_firmware_header_v1_0 *hdr =
1195                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1196                         fw_data = (const __le32 *)
1197                                 (adev->gfx.pfp_fw->data +
1198                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1199                         table_offset = le32_to_cpu(hdr->jt_offset);
1200                         table_size = le32_to_cpu(hdr->jt_size);
1201                 } else if (me == 2) {
1202                         const struct gfx_firmware_header_v1_0 *hdr =
1203                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1204                         fw_data = (const __le32 *)
1205                                 (adev->gfx.me_fw->data +
1206                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1207                         table_offset = le32_to_cpu(hdr->jt_offset);
1208                         table_size = le32_to_cpu(hdr->jt_size);
1209                 } else if (me == 3) {
1210                         const struct gfx_firmware_header_v1_0 *hdr =
1211                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1212                         fw_data = (const __le32 *)
1213                                 (adev->gfx.mec_fw->data +
1214                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1215                         table_offset = le32_to_cpu(hdr->jt_offset);
1216                         table_size = le32_to_cpu(hdr->jt_size);
1217                 } else  if (me == 4) {
1218                         const struct gfx_firmware_header_v1_0 *hdr =
1219                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
1220                         fw_data = (const __le32 *)
1221                                 (adev->gfx.mec2_fw->data +
1222                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1223                         table_offset = le32_to_cpu(hdr->jt_offset);
1224                         table_size = le32_to_cpu(hdr->jt_size);
1225                 }
1226
1227                 for (i = 0; i < table_size; i ++) {
1228                         dst_ptr[bo_offset + i] =
1229                                 cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
1230                 }
1231
1232                 bo_offset += table_size;
1233         }
1234 }
1235
1236 static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev)
1237 {
1238         int r;
1239
1240         /* clear state block */
1241         if (adev->gfx.rlc.clear_state_obj) {
1242                 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1243                 if (unlikely(r != 0))
1244                         dev_warn(adev->dev, "(%d) reserve RLC cbs bo failed\n", r);
1245                 amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
1246                 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1247                 amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
1248                 adev->gfx.rlc.clear_state_obj = NULL;
1249         }
1250
1251         /* jump table block */
1252         if (adev->gfx.rlc.cp_table_obj) {
1253                 r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, false);
1254                 if (unlikely(r != 0))
1255                         dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r);
1256                 amdgpu_bo_unpin(adev->gfx.rlc.cp_table_obj);
1257                 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1258                 amdgpu_bo_unref(&adev->gfx.rlc.cp_table_obj);
1259                 adev->gfx.rlc.cp_table_obj = NULL;
1260         }
1261 }
1262
1263 static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
1264 {
1265         volatile u32 *dst_ptr;
1266         u32 dws;
1267         const struct cs_section_def *cs_data;
1268         int r;
1269
1270         adev->gfx.rlc.cs_data = vi_cs_data;
1271
1272         cs_data = adev->gfx.rlc.cs_data;
1273
1274         if (cs_data) {
1275                 /* clear state block */
1276                 adev->gfx.rlc.clear_state_size = dws = gfx_v8_0_get_csb_size(adev);
1277
1278                 if (adev->gfx.rlc.clear_state_obj == NULL) {
1279                         r = amdgpu_bo_create(adev, dws * 4, PAGE_SIZE, true,
1280                                              AMDGPU_GEM_DOMAIN_VRAM,
1281                                              AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
1282                                              AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
1283                                              NULL, NULL,
1284                                              &adev->gfx.rlc.clear_state_obj);
1285                         if (r) {
1286                                 dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r);
1287                                 gfx_v8_0_rlc_fini(adev);
1288                                 return r;
1289                         }
1290                 }
1291                 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1292                 if (unlikely(r != 0)) {
1293                         gfx_v8_0_rlc_fini(adev);
1294                         return r;
1295                 }
1296                 r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj, AMDGPU_GEM_DOMAIN_VRAM,
1297                                   &adev->gfx.rlc.clear_state_gpu_addr);
1298                 if (r) {
1299                         amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1300                         dev_warn(adev->dev, "(%d) pin RLC cbs bo failed\n", r);
1301                         gfx_v8_0_rlc_fini(adev);
1302                         return r;
1303                 }
1304
1305                 r = amdgpu_bo_kmap(adev->gfx.rlc.clear_state_obj, (void **)&adev->gfx.rlc.cs_ptr);
1306                 if (r) {
1307                         dev_warn(adev->dev, "(%d) map RLC cbs bo failed\n", r);
1308                         gfx_v8_0_rlc_fini(adev);
1309                         return r;
1310                 }
1311                 /* set up the cs buffer */
1312                 dst_ptr = adev->gfx.rlc.cs_ptr;
1313                 gfx_v8_0_get_csb_buffer(adev, dst_ptr);
1314                 amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
1315                 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1316         }
1317
1318         if ((adev->asic_type == CHIP_CARRIZO) ||
1319             (adev->asic_type == CHIP_STONEY)) {
1320                 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1321                 if (adev->gfx.rlc.cp_table_obj == NULL) {
1322                         r = amdgpu_bo_create(adev, adev->gfx.rlc.cp_table_size, PAGE_SIZE, true,
1323                                              AMDGPU_GEM_DOMAIN_VRAM,
1324                                              AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
1325                                              AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
1326                                              NULL, NULL,
1327                                              &adev->gfx.rlc.cp_table_obj);
1328                         if (r) {
1329                                 dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r);
1330                                 return r;
1331                         }
1332                 }
1333
1334                 r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, false);
1335                 if (unlikely(r != 0)) {
1336                         dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r);
1337                         return r;
1338                 }
1339                 r = amdgpu_bo_pin(adev->gfx.rlc.cp_table_obj, AMDGPU_GEM_DOMAIN_VRAM,
1340                                   &adev->gfx.rlc.cp_table_gpu_addr);
1341                 if (r) {
1342                         amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1343                         dev_warn(adev->dev, "(%d) pin RLC cp table bo failed\n", r);
1344                         return r;
1345                 }
1346                 r = amdgpu_bo_kmap(adev->gfx.rlc.cp_table_obj, (void **)&adev->gfx.rlc.cp_table_ptr);
1347                 if (r) {
1348                         dev_warn(adev->dev, "(%d) map RLC cp table bo failed\n", r);
1349                         return r;
1350                 }
1351
1352                 cz_init_cp_jump_table(adev);
1353
1354                 amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj);
1355                 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1356         }
1357
1358         return 0;
1359 }
1360
1361 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
1362 {
1363         int r;
1364
1365         if (adev->gfx.mec.hpd_eop_obj) {
1366                 r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
1367                 if (unlikely(r != 0))
1368                         dev_warn(adev->dev, "(%d) reserve HPD EOP bo failed\n", r);
1369                 amdgpu_bo_unpin(adev->gfx.mec.hpd_eop_obj);
1370                 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1371                 amdgpu_bo_unref(&adev->gfx.mec.hpd_eop_obj);
1372                 adev->gfx.mec.hpd_eop_obj = NULL;
1373         }
1374 }
1375
1376 static int gfx_v8_0_kiq_init_ring(struct amdgpu_device *adev,
1377                                   struct amdgpu_ring *ring,
1378                                   struct amdgpu_irq_src *irq)
1379 {
1380         struct amdgpu_kiq *kiq = &adev->gfx.kiq;
1381         int r = 0;
1382
1383         r = amdgpu_wb_get(adev, &adev->virt.reg_val_offs);
1384         if (r)
1385                 return r;
1386
1387         ring->adev = NULL;
1388         ring->ring_obj = NULL;
1389         ring->use_doorbell = true;
1390         ring->doorbell_index = AMDGPU_DOORBELL_KIQ;
1391         if (adev->gfx.mec2_fw) {
1392                 ring->me = 2;
1393                 ring->pipe = 0;
1394         } else {
1395                 ring->me = 1;
1396                 ring->pipe = 1;
1397         }
1398
1399         ring->queue = 0;
1400         ring->eop_gpu_addr = kiq->eop_gpu_addr;
1401         sprintf(ring->name, "kiq %d.%d.%d", ring->me, ring->pipe, ring->queue);
1402         r = amdgpu_ring_init(adev, ring, 1024,
1403                              irq, AMDGPU_CP_KIQ_IRQ_DRIVER0);
1404         if (r)
1405                 dev_warn(adev->dev, "(%d) failed to init kiq ring\n", r);
1406
1407         return r;
1408 }
1409 static void gfx_v8_0_kiq_free_ring(struct amdgpu_ring *ring,
1410                                    struct amdgpu_irq_src *irq)
1411 {
1412         amdgpu_wb_free(ring->adev, ring->adev->virt.reg_val_offs);
1413         amdgpu_ring_fini(ring);
1414 }
1415
1416 #define MEC_HPD_SIZE 2048
1417
1418 static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
1419 {
1420         int r;
1421         u32 *hpd;
1422
1423         /*
1424          * we assign only 1 pipe because all other pipes will
1425          * be handled by KFD
1426          */
1427         adev->gfx.mec.num_mec = 1;
1428         adev->gfx.mec.num_pipe = 1;
1429         adev->gfx.mec.num_queue = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * 8;
1430
1431         if (adev->gfx.mec.hpd_eop_obj == NULL) {
1432                 r = amdgpu_bo_create(adev,
1433                                      adev->gfx.mec.num_queue * MEC_HPD_SIZE,
1434                                      PAGE_SIZE, true,
1435                                      AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
1436                                      &adev->gfx.mec.hpd_eop_obj);
1437                 if (r) {
1438                         dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1439                         return r;
1440                 }
1441         }
1442
1443         r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
1444         if (unlikely(r != 0)) {
1445                 gfx_v8_0_mec_fini(adev);
1446                 return r;
1447         }
1448         r = amdgpu_bo_pin(adev->gfx.mec.hpd_eop_obj, AMDGPU_GEM_DOMAIN_GTT,
1449                           &adev->gfx.mec.hpd_eop_gpu_addr);
1450         if (r) {
1451                 dev_warn(adev->dev, "(%d) pin HDP EOP bo failed\n", r);
1452                 gfx_v8_0_mec_fini(adev);
1453                 return r;
1454         }
1455         r = amdgpu_bo_kmap(adev->gfx.mec.hpd_eop_obj, (void **)&hpd);
1456         if (r) {
1457                 dev_warn(adev->dev, "(%d) map HDP EOP bo failed\n", r);
1458                 gfx_v8_0_mec_fini(adev);
1459                 return r;
1460         }
1461
1462         memset(hpd, 0, adev->gfx.mec.num_queue * MEC_HPD_SIZE);
1463
1464         amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1465         amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1466
1467         return 0;
1468 }
1469
1470 static void gfx_v8_0_kiq_fini(struct amdgpu_device *adev)
1471 {
1472         struct amdgpu_kiq *kiq = &adev->gfx.kiq;
1473
1474         amdgpu_bo_free_kernel(&kiq->eop_obj, &kiq->eop_gpu_addr, NULL);
1475 }
1476
1477 static int gfx_v8_0_kiq_init(struct amdgpu_device *adev)
1478 {
1479         int r;
1480         u32 *hpd;
1481         struct amdgpu_kiq *kiq = &adev->gfx.kiq;
1482
1483         r = amdgpu_bo_create_kernel(adev, MEC_HPD_SIZE, PAGE_SIZE,
1484                                     AMDGPU_GEM_DOMAIN_GTT, &kiq->eop_obj,
1485                                     &kiq->eop_gpu_addr, (void **)&hpd);
1486         if (r) {
1487                 dev_warn(adev->dev, "failed to create KIQ bo (%d).\n", r);
1488                 return r;
1489         }
1490
1491         memset(hpd, 0, MEC_HPD_SIZE);
1492
1493         r = amdgpu_bo_reserve(kiq->eop_obj, false);
1494         if (unlikely(r != 0))
1495                 dev_warn(adev->dev, "(%d) reserve kiq eop bo failed\n", r);
1496         amdgpu_bo_kunmap(kiq->eop_obj);
1497         amdgpu_bo_unreserve(kiq->eop_obj);
1498
1499         return 0;
1500 }
1501
1502 static const u32 vgpr_init_compute_shader[] =
1503 {
1504         0x7e000209, 0x7e020208,
1505         0x7e040207, 0x7e060206,
1506         0x7e080205, 0x7e0a0204,
1507         0x7e0c0203, 0x7e0e0202,
1508         0x7e100201, 0x7e120200,
1509         0x7e140209, 0x7e160208,
1510         0x7e180207, 0x7e1a0206,
1511         0x7e1c0205, 0x7e1e0204,
1512         0x7e200203, 0x7e220202,
1513         0x7e240201, 0x7e260200,
1514         0x7e280209, 0x7e2a0208,
1515         0x7e2c0207, 0x7e2e0206,
1516         0x7e300205, 0x7e320204,
1517         0x7e340203, 0x7e360202,
1518         0x7e380201, 0x7e3a0200,
1519         0x7e3c0209, 0x7e3e0208,
1520         0x7e400207, 0x7e420206,
1521         0x7e440205, 0x7e460204,
1522         0x7e480203, 0x7e4a0202,
1523         0x7e4c0201, 0x7e4e0200,
1524         0x7e500209, 0x7e520208,
1525         0x7e540207, 0x7e560206,
1526         0x7e580205, 0x7e5a0204,
1527         0x7e5c0203, 0x7e5e0202,
1528         0x7e600201, 0x7e620200,
1529         0x7e640209, 0x7e660208,
1530         0x7e680207, 0x7e6a0206,
1531         0x7e6c0205, 0x7e6e0204,
1532         0x7e700203, 0x7e720202,
1533         0x7e740201, 0x7e760200,
1534         0x7e780209, 0x7e7a0208,
1535         0x7e7c0207, 0x7e7e0206,
1536         0xbf8a0000, 0xbf810000,
1537 };
1538
1539 static const u32 sgpr_init_compute_shader[] =
1540 {
1541         0xbe8a0100, 0xbe8c0102,
1542         0xbe8e0104, 0xbe900106,
1543         0xbe920108, 0xbe940100,
1544         0xbe960102, 0xbe980104,
1545         0xbe9a0106, 0xbe9c0108,
1546         0xbe9e0100, 0xbea00102,
1547         0xbea20104, 0xbea40106,
1548         0xbea60108, 0xbea80100,
1549         0xbeaa0102, 0xbeac0104,
1550         0xbeae0106, 0xbeb00108,
1551         0xbeb20100, 0xbeb40102,
1552         0xbeb60104, 0xbeb80106,
1553         0xbeba0108, 0xbebc0100,
1554         0xbebe0102, 0xbec00104,
1555         0xbec20106, 0xbec40108,
1556         0xbec60100, 0xbec80102,
1557         0xbee60004, 0xbee70005,
1558         0xbeea0006, 0xbeeb0007,
1559         0xbee80008, 0xbee90009,
1560         0xbefc0000, 0xbf8a0000,
1561         0xbf810000, 0x00000000,
1562 };
1563
1564 static const u32 vgpr_init_regs[] =
1565 {
1566         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1567         mmCOMPUTE_RESOURCE_LIMITS, 0,
1568         mmCOMPUTE_NUM_THREAD_X, 256*4,
1569         mmCOMPUTE_NUM_THREAD_Y, 1,
1570         mmCOMPUTE_NUM_THREAD_Z, 1,
1571         mmCOMPUTE_PGM_RSRC2, 20,
1572         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1573         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1574         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1575         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1576         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1577         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1578         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1579         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1580         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1581         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1582 };
1583
1584 static const u32 sgpr1_init_regs[] =
1585 {
1586         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1587         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1588         mmCOMPUTE_NUM_THREAD_X, 256*5,
1589         mmCOMPUTE_NUM_THREAD_Y, 1,
1590         mmCOMPUTE_NUM_THREAD_Z, 1,
1591         mmCOMPUTE_PGM_RSRC2, 20,
1592         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1593         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1594         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1595         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1596         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1597         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1598         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1599         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1600         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1601         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1602 };
1603
1604 static const u32 sgpr2_init_regs[] =
1605 {
1606         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1607         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1608         mmCOMPUTE_NUM_THREAD_X, 256*5,
1609         mmCOMPUTE_NUM_THREAD_Y, 1,
1610         mmCOMPUTE_NUM_THREAD_Z, 1,
1611         mmCOMPUTE_PGM_RSRC2, 20,
1612         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1613         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1614         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1615         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1616         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1617         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1618         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1619         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1620         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1621         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1622 };
1623
1624 static const u32 sec_ded_counter_registers[] =
1625 {
1626         mmCPC_EDC_ATC_CNT,
1627         mmCPC_EDC_SCRATCH_CNT,
1628         mmCPC_EDC_UCODE_CNT,
1629         mmCPF_EDC_ATC_CNT,
1630         mmCPF_EDC_ROQ_CNT,
1631         mmCPF_EDC_TAG_CNT,
1632         mmCPG_EDC_ATC_CNT,
1633         mmCPG_EDC_DMA_CNT,
1634         mmCPG_EDC_TAG_CNT,
1635         mmDC_EDC_CSINVOC_CNT,
1636         mmDC_EDC_RESTORE_CNT,
1637         mmDC_EDC_STATE_CNT,
1638         mmGDS_EDC_CNT,
1639         mmGDS_EDC_GRBM_CNT,
1640         mmGDS_EDC_OA_DED,
1641         mmSPI_EDC_CNT,
1642         mmSQC_ATC_EDC_GATCL1_CNT,
1643         mmSQC_EDC_CNT,
1644         mmSQ_EDC_DED_CNT,
1645         mmSQ_EDC_INFO,
1646         mmSQ_EDC_SEC_CNT,
1647         mmTCC_EDC_CNT,
1648         mmTCP_ATC_EDC_GATCL1_CNT,
1649         mmTCP_EDC_CNT,
1650         mmTD_EDC_CNT
1651 };
1652
1653 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1654 {
1655         struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1656         struct amdgpu_ib ib;
1657         struct dma_fence *f = NULL;
1658         int r, i;
1659         u32 tmp;
1660         unsigned total_size, vgpr_offset, sgpr_offset;
1661         u64 gpu_addr;
1662
1663         /* only supported on CZ */
1664         if (adev->asic_type != CHIP_CARRIZO)
1665                 return 0;
1666
1667         /* bail if the compute ring is not ready */
1668         if (!ring->ready)
1669                 return 0;
1670
1671         tmp = RREG32(mmGB_EDC_MODE);
1672         WREG32(mmGB_EDC_MODE, 0);
1673
1674         total_size =
1675                 (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1676         total_size +=
1677                 (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1678         total_size +=
1679                 (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1680         total_size = ALIGN(total_size, 256);
1681         vgpr_offset = total_size;
1682         total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1683         sgpr_offset = total_size;
1684         total_size += sizeof(sgpr_init_compute_shader);
1685
1686         /* allocate an indirect buffer to put the commands in */
1687         memset(&ib, 0, sizeof(ib));
1688         r = amdgpu_ib_get(adev, NULL, total_size, &ib);
1689         if (r) {
1690                 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1691                 return r;
1692         }
1693
1694         /* load the compute shaders */
1695         for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1696                 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1697
1698         for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1699                 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1700
1701         /* init the ib length to 0 */
1702         ib.length_dw = 0;
1703
1704         /* VGPR */
1705         /* write the register state for the compute dispatch */
1706         for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1707                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1708                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1709                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1710         }
1711         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1712         gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1713         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1714         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1715         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1716         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1717
1718         /* write dispatch packet */
1719         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1720         ib.ptr[ib.length_dw++] = 8; /* x */
1721         ib.ptr[ib.length_dw++] = 1; /* y */
1722         ib.ptr[ib.length_dw++] = 1; /* z */
1723         ib.ptr[ib.length_dw++] =
1724                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1725
1726         /* write CS partial flush packet */
1727         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1728         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1729
1730         /* SGPR1 */
1731         /* write the register state for the compute dispatch */
1732         for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1733                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1734                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1735                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1736         }
1737         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1738         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1739         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1740         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1741         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1742         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1743
1744         /* write dispatch packet */
1745         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1746         ib.ptr[ib.length_dw++] = 8; /* x */
1747         ib.ptr[ib.length_dw++] = 1; /* y */
1748         ib.ptr[ib.length_dw++] = 1; /* z */
1749         ib.ptr[ib.length_dw++] =
1750                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1751
1752         /* write CS partial flush packet */
1753         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1754         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1755
1756         /* SGPR2 */
1757         /* write the register state for the compute dispatch */
1758         for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1759                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1760                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1761                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1762         }
1763         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1764         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1765         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1766         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1767         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1768         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1769
1770         /* write dispatch packet */
1771         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1772         ib.ptr[ib.length_dw++] = 8; /* x */
1773         ib.ptr[ib.length_dw++] = 1; /* y */
1774         ib.ptr[ib.length_dw++] = 1; /* z */
1775         ib.ptr[ib.length_dw++] =
1776                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1777
1778         /* write CS partial flush packet */
1779         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1780         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1781
1782         /* shedule the ib on the ring */
1783         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1784         if (r) {
1785                 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1786                 goto fail;
1787         }
1788
1789         /* wait for the GPU to finish processing the IB */
1790         r = dma_fence_wait(f, false);
1791         if (r) {
1792                 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1793                 goto fail;
1794         }
1795
1796         tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1797         tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1798         WREG32(mmGB_EDC_MODE, tmp);
1799
1800         tmp = RREG32(mmCC_GC_EDC_CONFIG);
1801         tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1802         WREG32(mmCC_GC_EDC_CONFIG, tmp);
1803
1804
1805         /* read back registers to clear the counters */
1806         for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1807                 RREG32(sec_ded_counter_registers[i]);
1808
1809 fail:
1810         amdgpu_ib_free(adev, &ib, NULL);
1811         dma_fence_put(f);
1812
1813         return r;
1814 }
1815
1816 static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1817 {
1818         u32 gb_addr_config;
1819         u32 mc_shared_chmap, mc_arb_ramcfg;
1820         u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1821         u32 tmp;
1822         int ret;
1823
1824         switch (adev->asic_type) {
1825         case CHIP_TOPAZ:
1826                 adev->gfx.config.max_shader_engines = 1;
1827                 adev->gfx.config.max_tile_pipes = 2;
1828                 adev->gfx.config.max_cu_per_sh = 6;
1829                 adev->gfx.config.max_sh_per_se = 1;
1830                 adev->gfx.config.max_backends_per_se = 2;
1831                 adev->gfx.config.max_texture_channel_caches = 2;
1832                 adev->gfx.config.max_gprs = 256;
1833                 adev->gfx.config.max_gs_threads = 32;
1834                 adev->gfx.config.max_hw_contexts = 8;
1835
1836                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1837                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1838                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1839                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1840                 gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1841                 break;
1842         case CHIP_FIJI:
1843                 adev->gfx.config.max_shader_engines = 4;
1844                 adev->gfx.config.max_tile_pipes = 16;
1845                 adev->gfx.config.max_cu_per_sh = 16;
1846                 adev->gfx.config.max_sh_per_se = 1;
1847                 adev->gfx.config.max_backends_per_se = 4;
1848                 adev->gfx.config.max_texture_channel_caches = 16;
1849                 adev->gfx.config.max_gprs = 256;
1850                 adev->gfx.config.max_gs_threads = 32;
1851                 adev->gfx.config.max_hw_contexts = 8;
1852
1853                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1854                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1855                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1856                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1857                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1858                 break;
1859         case CHIP_POLARIS11:
1860         case CHIP_POLARIS12:
1861                 ret = amdgpu_atombios_get_gfx_info(adev);
1862                 if (ret)
1863                         return ret;
1864                 adev->gfx.config.max_gprs = 256;
1865                 adev->gfx.config.max_gs_threads = 32;
1866                 adev->gfx.config.max_hw_contexts = 8;
1867
1868                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1869                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1870                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1871                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1872                 gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
1873                 break;
1874         case CHIP_POLARIS10:
1875                 ret = amdgpu_atombios_get_gfx_info(adev);
1876                 if (ret)
1877                         return ret;
1878                 adev->gfx.config.max_gprs = 256;
1879                 adev->gfx.config.max_gs_threads = 32;
1880                 adev->gfx.config.max_hw_contexts = 8;
1881
1882                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1883                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1884                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1885                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1886                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1887                 break;
1888         case CHIP_TONGA:
1889                 adev->gfx.config.max_shader_engines = 4;
1890                 adev->gfx.config.max_tile_pipes = 8;
1891                 adev->gfx.config.max_cu_per_sh = 8;
1892                 adev->gfx.config.max_sh_per_se = 1;
1893                 adev->gfx.config.max_backends_per_se = 2;
1894                 adev->gfx.config.max_texture_channel_caches = 8;
1895                 adev->gfx.config.max_gprs = 256;
1896                 adev->gfx.config.max_gs_threads = 32;
1897                 adev->gfx.config.max_hw_contexts = 8;
1898
1899                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1900                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1901                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1902                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1903                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1904                 break;
1905         case CHIP_CARRIZO:
1906                 adev->gfx.config.max_shader_engines = 1;
1907                 adev->gfx.config.max_tile_pipes = 2;
1908                 adev->gfx.config.max_sh_per_se = 1;
1909                 adev->gfx.config.max_backends_per_se = 2;
1910
1911                 switch (adev->pdev->revision) {
1912                 case 0xc4:
1913                 case 0x84:
1914                 case 0xc8:
1915                 case 0xcc:
1916                 case 0xe1:
1917                 case 0xe3:
1918                         /* B10 */
1919                         adev->gfx.config.max_cu_per_sh = 8;
1920                         break;
1921                 case 0xc5:
1922                 case 0x81:
1923                 case 0x85:
1924                 case 0xc9:
1925                 case 0xcd:
1926                 case 0xe2:
1927                 case 0xe4:
1928                         /* B8 */
1929                         adev->gfx.config.max_cu_per_sh = 6;
1930                         break;
1931                 case 0xc6:
1932                 case 0xca:
1933                 case 0xce:
1934                 case 0x88:
1935                         /* B6 */
1936                         adev->gfx.config.max_cu_per_sh = 6;
1937                         break;
1938                 case 0xc7:
1939                 case 0x87:
1940                 case 0xcb:
1941                 case 0xe5:
1942                 case 0x89:
1943                 default:
1944                         /* B4 */
1945                         adev->gfx.config.max_cu_per_sh = 4;
1946                         break;
1947                 }
1948
1949                 adev->gfx.config.max_texture_channel_caches = 2;
1950                 adev->gfx.config.max_gprs = 256;
1951                 adev->gfx.config.max_gs_threads = 32;
1952                 adev->gfx.config.max_hw_contexts = 8;
1953
1954                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1955                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1956                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1957                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1958                 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1959                 break;
1960         case CHIP_STONEY:
1961                 adev->gfx.config.max_shader_engines = 1;
1962                 adev->gfx.config.max_tile_pipes = 2;
1963                 adev->gfx.config.max_sh_per_se = 1;
1964                 adev->gfx.config.max_backends_per_se = 1;
1965
1966                 switch (adev->pdev->revision) {
1967                 case 0xc0:
1968                 case 0xc1:
1969                 case 0xc2:
1970                 case 0xc4:
1971                 case 0xc8:
1972                 case 0xc9:
1973                         adev->gfx.config.max_cu_per_sh = 3;
1974                         break;
1975                 case 0xd0:
1976                 case 0xd1:
1977                 case 0xd2:
1978                 default:
1979                         adev->gfx.config.max_cu_per_sh = 2;
1980                         break;
1981                 }
1982
1983                 adev->gfx.config.max_texture_channel_caches = 2;
1984                 adev->gfx.config.max_gprs = 256;
1985                 adev->gfx.config.max_gs_threads = 16;
1986                 adev->gfx.config.max_hw_contexts = 8;
1987
1988                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1989                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1990                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1991                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1992                 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1993                 break;
1994         default:
1995                 adev->gfx.config.max_shader_engines = 2;
1996                 adev->gfx.config.max_tile_pipes = 4;
1997                 adev->gfx.config.max_cu_per_sh = 2;
1998                 adev->gfx.config.max_sh_per_se = 1;
1999                 adev->gfx.config.max_backends_per_se = 2;
2000                 adev->gfx.config.max_texture_channel_caches = 4;
2001                 adev->gfx.config.max_gprs = 256;
2002                 adev->gfx.config.max_gs_threads = 32;
2003                 adev->gfx.config.max_hw_contexts = 8;
2004
2005                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2006                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2007                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2008                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
2009                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
2010                 break;
2011         }
2012
2013         mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
2014         adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
2015         mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
2016
2017         adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
2018         adev->gfx.config.mem_max_burst_length_bytes = 256;
2019         if (adev->flags & AMD_IS_APU) {
2020                 /* Get memory bank mapping mode. */
2021                 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
2022                 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
2023                 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
2024
2025                 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
2026                 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
2027                 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
2028
2029                 /* Validate settings in case only one DIMM installed. */
2030                 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
2031                         dimm00_addr_map = 0;
2032                 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
2033                         dimm01_addr_map = 0;
2034                 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
2035                         dimm10_addr_map = 0;
2036                 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
2037                         dimm11_addr_map = 0;
2038
2039                 /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
2040                 /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
2041                 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
2042                         adev->gfx.config.mem_row_size_in_kb = 2;
2043                 else
2044                         adev->gfx.config.mem_row_size_in_kb = 1;
2045         } else {
2046                 tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
2047                 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
2048                 if (adev->gfx.config.mem_row_size_in_kb > 4)
2049                         adev->gfx.config.mem_row_size_in_kb = 4;
2050         }
2051
2052         adev->gfx.config.shader_engine_tile_size = 32;
2053         adev->gfx.config.num_gpus = 1;
2054         adev->gfx.config.multi_gpu_tile_size = 64;
2055
2056         /* fix up row size */
2057         switch (adev->gfx.config.mem_row_size_in_kb) {
2058         case 1:
2059         default:
2060                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
2061                 break;
2062         case 2:
2063                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
2064                 break;
2065         case 4:
2066                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
2067                 break;
2068         }
2069         adev->gfx.config.gb_addr_config = gb_addr_config;
2070
2071         return 0;
2072 }
2073
2074 static int gfx_v8_0_sw_init(void *handle)
2075 {
2076         int i, r;
2077         struct amdgpu_ring *ring;
2078         struct amdgpu_kiq *kiq;
2079         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2080
2081         /* KIQ event */
2082         r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 178, &adev->gfx.kiq.irq);
2083         if (r)
2084                 return r;
2085
2086         /* EOP Event */
2087         r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 181, &adev->gfx.eop_irq);
2088         if (r)
2089                 return r;
2090
2091         /* Privileged reg */
2092         r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 184,
2093                               &adev->gfx.priv_reg_irq);
2094         if (r)
2095                 return r;
2096
2097         /* Privileged inst */
2098         r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 185,
2099                               &adev->gfx.priv_inst_irq);
2100         if (r)
2101                 return r;
2102
2103         adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2104
2105         gfx_v8_0_scratch_init(adev);
2106
2107         r = gfx_v8_0_init_microcode(adev);
2108         if (r) {
2109                 DRM_ERROR("Failed to load gfx firmware!\n");
2110                 return r;
2111         }
2112
2113         r = gfx_v8_0_rlc_init(adev);
2114         if (r) {
2115                 DRM_ERROR("Failed to init rlc BOs!\n");
2116                 return r;
2117         }
2118
2119         r = gfx_v8_0_mec_init(adev);
2120         if (r) {
2121                 DRM_ERROR("Failed to init MEC BOs!\n");
2122                 return r;
2123         }
2124
2125         /* set up the gfx ring */
2126         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2127                 ring = &adev->gfx.gfx_ring[i];
2128                 ring->ring_obj = NULL;
2129                 sprintf(ring->name, "gfx");
2130                 /* no gfx doorbells on iceland */
2131                 if (adev->asic_type != CHIP_TOPAZ) {
2132                         ring->use_doorbell = true;
2133                         ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0;
2134                 }
2135
2136                 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2137                                      AMDGPU_CP_IRQ_GFX_EOP);
2138                 if (r)
2139                         return r;
2140         }
2141
2142         /* set up the compute queues */
2143         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
2144                 unsigned irq_type;
2145
2146                 /* max 32 queues per MEC */
2147                 if ((i >= 32) || (i >= AMDGPU_MAX_COMPUTE_RINGS)) {
2148                         DRM_ERROR("Too many (%d) compute rings!\n", i);
2149                         break;
2150                 }
2151                 ring = &adev->gfx.compute_ring[i];
2152                 ring->ring_obj = NULL;
2153                 ring->use_doorbell = true;
2154                 ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + i;
2155                 ring->me = 1; /* first MEC */
2156                 ring->pipe = i / 8;
2157                 ring->queue = i % 8;
2158                 ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE);
2159                 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
2160                 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe;
2161                 /* type-2 packets are deprecated on MEC, use type-3 instead */
2162                 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2163                                      irq_type);
2164                 if (r)
2165                         return r;
2166         }
2167
2168         if (amdgpu_sriov_vf(adev)) {
2169                 r = gfx_v8_0_kiq_init(adev);
2170                 if (r) {
2171                         DRM_ERROR("Failed to init KIQ BOs!\n");
2172                         return r;
2173                 }
2174
2175                 kiq = &adev->gfx.kiq;
2176                 r = gfx_v8_0_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
2177                 if (r)
2178                         return r;
2179
2180                 /* create MQD for all compute queues as wel as KIQ for SRIOV case */
2181                 r = gfx_v8_0_compute_mqd_sw_init(adev);
2182                 if (r)
2183                         return r;
2184         }
2185
2186         /* reserve GDS, GWS and OA resource for gfx */
2187         r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size,
2188                                     PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS,
2189                                     &adev->gds.gds_gfx_bo, NULL, NULL);
2190         if (r)
2191                 return r;
2192
2193         r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size,
2194                                     PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS,
2195                                     &adev->gds.gws_gfx_bo, NULL, NULL);
2196         if (r)
2197                 return r;
2198
2199         r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size,
2200                                     PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA,
2201                                     &adev->gds.oa_gfx_bo, NULL, NULL);
2202         if (r)
2203                 return r;
2204
2205         adev->gfx.ce_ram_size = 0x8000;
2206
2207         r = gfx_v8_0_gpu_early_init(adev);
2208         if (r)
2209                 return r;
2210
2211         return 0;
2212 }
2213
2214 static int gfx_v8_0_sw_fini(void *handle)
2215 {
2216         int i;
2217         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2218
2219         amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
2220         amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL);
2221         amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
2222
2223         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2224                 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2225         for (i = 0; i < adev->gfx.num_compute_rings; i++)
2226                 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2227
2228         if (amdgpu_sriov_vf(adev)) {
2229                 gfx_v8_0_compute_mqd_sw_fini(adev);
2230                 gfx_v8_0_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
2231                 gfx_v8_0_kiq_fini(adev);
2232         }
2233
2234         gfx_v8_0_mec_fini(adev);
2235         gfx_v8_0_rlc_fini(adev);
2236         gfx_v8_0_free_microcode(adev);
2237
2238         return 0;
2239 }
2240
2241 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
2242 {
2243         uint32_t *modearray, *mod2array;
2244         const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
2245         const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
2246         u32 reg_offset;
2247
2248         modearray = adev->gfx.config.tile_mode_array;
2249         mod2array = adev->gfx.config.macrotile_mode_array;
2250
2251         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2252                 modearray[reg_offset] = 0;
2253
2254         for (reg_offset = 0; reg_offset <  num_secondary_tile_mode_states; reg_offset++)
2255                 mod2array[reg_offset] = 0;
2256
2257         switch (adev->asic_type) {
2258         case CHIP_TOPAZ:
2259                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2260                                 PIPE_CONFIG(ADDR_SURF_P2) |
2261                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2262                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2263                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2264                                 PIPE_CONFIG(ADDR_SURF_P2) |
2265                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2266                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2267                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2268                                 PIPE_CONFIG(ADDR_SURF_P2) |
2269                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2270                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2271                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2272                                 PIPE_CONFIG(ADDR_SURF_P2) |
2273                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2274                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2275                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2276                                 PIPE_CONFIG(ADDR_SURF_P2) |
2277                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2278                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2279                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2280                                 PIPE_CONFIG(ADDR_SURF_P2) |
2281                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2282                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2283                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2284                                 PIPE_CONFIG(ADDR_SURF_P2) |
2285                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2286                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2287                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2288                                 PIPE_CONFIG(ADDR_SURF_P2));
2289                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2290                                 PIPE_CONFIG(ADDR_SURF_P2) |
2291                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2292                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2293                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2294                                  PIPE_CONFIG(ADDR_SURF_P2) |
2295                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2296                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2297                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2298                                  PIPE_CONFIG(ADDR_SURF_P2) |
2299                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2300                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2301                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2302                                  PIPE_CONFIG(ADDR_SURF_P2) |
2303                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2304                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2305                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2306                                  PIPE_CONFIG(ADDR_SURF_P2) |
2307                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2308                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2309                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2310                                  PIPE_CONFIG(ADDR_SURF_P2) |
2311                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2312                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2313                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2314                                  PIPE_CONFIG(ADDR_SURF_P2) |
2315                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2316                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2317                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2318                                  PIPE_CONFIG(ADDR_SURF_P2) |
2319                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2320                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2321                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2322                                  PIPE_CONFIG(ADDR_SURF_P2) |
2323                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2324                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2325                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2326                                  PIPE_CONFIG(ADDR_SURF_P2) |
2327                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2328                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2329                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2330                                  PIPE_CONFIG(ADDR_SURF_P2) |
2331                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2332                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2333                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2334                                  PIPE_CONFIG(ADDR_SURF_P2) |
2335                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2336                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2337                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2338                                  PIPE_CONFIG(ADDR_SURF_P2) |
2339                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2340                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2341                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2342                                  PIPE_CONFIG(ADDR_SURF_P2) |
2343                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2344                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2345                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2346                                  PIPE_CONFIG(ADDR_SURF_P2) |
2347                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2348                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2349                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2350                                  PIPE_CONFIG(ADDR_SURF_P2) |
2351                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2352                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2353                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2354                                  PIPE_CONFIG(ADDR_SURF_P2) |
2355                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2356                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2357                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2358                                  PIPE_CONFIG(ADDR_SURF_P2) |
2359                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2360                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2361
2362                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2363                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2364                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2365                                 NUM_BANKS(ADDR_SURF_8_BANK));
2366                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2367                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2368                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2369                                 NUM_BANKS(ADDR_SURF_8_BANK));
2370                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2371                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2372                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2373                                 NUM_BANKS(ADDR_SURF_8_BANK));
2374                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2375                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2376                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2377                                 NUM_BANKS(ADDR_SURF_8_BANK));
2378                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2379                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2380                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2381                                 NUM_BANKS(ADDR_SURF_8_BANK));
2382                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2383                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2384                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2385                                 NUM_BANKS(ADDR_SURF_8_BANK));
2386                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2387                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2388                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2389                                 NUM_BANKS(ADDR_SURF_8_BANK));
2390                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2391                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2392                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2393                                 NUM_BANKS(ADDR_SURF_16_BANK));
2394                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2395                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2396                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2397                                 NUM_BANKS(ADDR_SURF_16_BANK));
2398                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2399                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2400                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2401                                  NUM_BANKS(ADDR_SURF_16_BANK));
2402                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2403                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2404                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2405                                  NUM_BANKS(ADDR_SURF_16_BANK));
2406                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2407                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2408                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2409                                  NUM_BANKS(ADDR_SURF_16_BANK));
2410                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2411                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2412                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2413                                  NUM_BANKS(ADDR_SURF_16_BANK));
2414                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2415                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2416                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2417                                  NUM_BANKS(ADDR_SURF_8_BANK));
2418
2419                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2420                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2421                             reg_offset != 23)
2422                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2423
2424                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2425                         if (reg_offset != 7)
2426                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2427
2428                 break;
2429         case CHIP_FIJI:
2430                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2431                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2432                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2433                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2434                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2435                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2436                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2437                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2438                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2439                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2440                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2441                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2442                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2443                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2444                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2445                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2446                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2447                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2448                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2449                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2450                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2451                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2452                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2453                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2454                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2455                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2456                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2457                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2458                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2459                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2460                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2461                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2462                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2463                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2464                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2465                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2466                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2467                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2468                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2469                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2470                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2471                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2472                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2473                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2474                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2475                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2476                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2477                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2478                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2479                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2480                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2481                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2482                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2483                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2484                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2485                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2486                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2487                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2488                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2489                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2490                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2491                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2492                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2493                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2494                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2495                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2496                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2497                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2498                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2499                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2500                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2501                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2502                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2503                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2504                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2505                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2506                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2507                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2508                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2509                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2510                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2511                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2512                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2513                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2514                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2515                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2516                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2517                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2518                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2519                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2520                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2521                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2522                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2523                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2524                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2525                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2526                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2527                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2528                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2529                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2530                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2531                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2532                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2533                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2534                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2535                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2536                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2537                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2538                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2539                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2540                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2541                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2542                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2543                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2544                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2545                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2546                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2547                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2548                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2549                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2550                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2551                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2552
2553                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2554                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2555                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2556                                 NUM_BANKS(ADDR_SURF_8_BANK));
2557                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2558                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2559                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2560                                 NUM_BANKS(ADDR_SURF_8_BANK));
2561                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2562                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2563                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2564                                 NUM_BANKS(ADDR_SURF_8_BANK));
2565                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2566                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2567                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2568                                 NUM_BANKS(ADDR_SURF_8_BANK));
2569                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2570                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2571                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2572                                 NUM_BANKS(ADDR_SURF_8_BANK));
2573                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2574                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2575                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2576                                 NUM_BANKS(ADDR_SURF_8_BANK));
2577                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2578                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2579                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2580                                 NUM_BANKS(ADDR_SURF_8_BANK));
2581                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2582                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2583                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2584                                 NUM_BANKS(ADDR_SURF_8_BANK));
2585                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2586                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2587                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2588                                 NUM_BANKS(ADDR_SURF_8_BANK));
2589                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2590                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2591                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2592                                  NUM_BANKS(ADDR_SURF_8_BANK));
2593                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2594                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2595                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2596                                  NUM_BANKS(ADDR_SURF_8_BANK));
2597                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2598                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2599                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2600                                  NUM_BANKS(ADDR_SURF_8_BANK));
2601                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2602                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2603                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2604                                  NUM_BANKS(ADDR_SURF_8_BANK));
2605                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2606                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2607                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2608                                  NUM_BANKS(ADDR_SURF_4_BANK));
2609
2610                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2611                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2612
2613                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2614                         if (reg_offset != 7)
2615                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2616
2617                 break;
2618         case CHIP_TONGA:
2619                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2620                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2621                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2622                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2623                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2624                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2625                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2626                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2627                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2628                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2629                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2630                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2631                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2632                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2633                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2634                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2635                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2636                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2637                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2638                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2639                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2640                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2641                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2642                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2643                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2644                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2645                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2646                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2647                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2648                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2649                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2650                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2651                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2652                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2653                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2654                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2655                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2656                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2657                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2658                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2659                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2660                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2661                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2662                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2663                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2664                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2665                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2666                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2667                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2668                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2669                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2670                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2671                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2672                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2673                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2674                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2675                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2676                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2677                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2678                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2679                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2680                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2681                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2682                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2683                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2684                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2685                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2686                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2687                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2688                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2689                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2690                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2691                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2692                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2693                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2694                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2695                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2696                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2697                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2698                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2699                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2700                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2701                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2702                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2703                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2704                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2705                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2706                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2707                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2708                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2709                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2710                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2711                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2712                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2713                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2714                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2715                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2716                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2717                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2718                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2719                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2720                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2721                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2722                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2723                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2724                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2725                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2726                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2727                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2728                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2729                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2730                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2731                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2732                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2733                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2734                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2735                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2736                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2737                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2738                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2739                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2740                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2741
2742                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2743                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2744                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2745                                 NUM_BANKS(ADDR_SURF_16_BANK));
2746                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2747                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2748                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2749                                 NUM_BANKS(ADDR_SURF_16_BANK));
2750                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2751                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2752                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2753                                 NUM_BANKS(ADDR_SURF_16_BANK));
2754                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2755                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2756                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2757                                 NUM_BANKS(ADDR_SURF_16_BANK));
2758                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2759                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2760                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2761                                 NUM_BANKS(ADDR_SURF_16_BANK));
2762                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2763                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2764                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2765                                 NUM_BANKS(ADDR_SURF_16_BANK));
2766                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2767                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2768                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2769                                 NUM_BANKS(ADDR_SURF_16_BANK));
2770                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2771                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2772                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2773                                 NUM_BANKS(ADDR_SURF_16_BANK));
2774                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2775                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2776                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2777                                 NUM_BANKS(ADDR_SURF_16_BANK));
2778                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2779                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2780                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2781                                  NUM_BANKS(ADDR_SURF_16_BANK));
2782                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2783                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2784                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2785                                  NUM_BANKS(ADDR_SURF_16_BANK));
2786                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2787                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2788                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2789                                  NUM_BANKS(ADDR_SURF_8_BANK));
2790                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2791                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2792                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2793                                  NUM_BANKS(ADDR_SURF_4_BANK));
2794                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2795                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2796                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2797                                  NUM_BANKS(ADDR_SURF_4_BANK));
2798
2799                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2800                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2801
2802                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2803                         if (reg_offset != 7)
2804                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2805
2806                 break;
2807         case CHIP_POLARIS11:
2808         case CHIP_POLARIS12:
2809                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2810                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2811                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2812                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2813                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2814                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2815                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2816                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2817                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2818                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2819                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2820                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2821                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2822                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2823                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2824                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2825                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2826                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2827                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2828                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2829                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2830                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2831                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2832                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2833                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2834                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2835                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2836                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2837                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2838                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2839                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2840                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2841                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2842                                 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2843                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2844                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2845                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2846                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2847                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2848                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2849                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2850                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2851                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2852                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2853                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2854                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2855                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2856                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2857                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2858                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2859                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2860                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2861                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2862                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2863                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2864                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2865                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2866                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2867                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2868                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2869                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2870                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2871                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2872                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2873                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2874                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2875                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2876                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2877                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2878                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2879                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2880                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2881                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2882                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2883                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2884                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2885                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2886                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2887                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2888                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2889                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2890                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2891                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2892                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2893                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2894                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2895                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2896                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2897                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2898                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2899                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2900                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2901                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2902                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2903                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2904                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2905                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2906                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2907                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2908                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2909                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2910                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2911                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2912                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2913                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2914                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2915                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2916                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2917                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2918                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2919                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2920                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2921                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2922                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2923                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2924                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2925                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2926                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2927                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2928                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2929                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2930                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2931
2932                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2933                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2934                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2935                                 NUM_BANKS(ADDR_SURF_16_BANK));
2936
2937                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2938                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2939                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2940                                 NUM_BANKS(ADDR_SURF_16_BANK));
2941
2942                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2943                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2944                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2945                                 NUM_BANKS(ADDR_SURF_16_BANK));
2946
2947                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2948                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2949                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2950                                 NUM_BANKS(ADDR_SURF_16_BANK));
2951
2952                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2953                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2954                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2955                                 NUM_BANKS(ADDR_SURF_16_BANK));
2956
2957                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2958                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2959                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2960                                 NUM_BANKS(ADDR_SURF_16_BANK));
2961
2962                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2963                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2964                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2965                                 NUM_BANKS(ADDR_SURF_16_BANK));
2966
2967                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2968                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2969                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2970                                 NUM_BANKS(ADDR_SURF_16_BANK));
2971
2972                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2973                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2974                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2975                                 NUM_BANKS(ADDR_SURF_16_BANK));
2976
2977                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2978                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2979                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2980                                 NUM_BANKS(ADDR_SURF_16_BANK));
2981
2982                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2983                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2984                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2985                                 NUM_BANKS(ADDR_SURF_16_BANK));
2986
2987                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2988                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2989                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2990                                 NUM_BANKS(ADDR_SURF_16_BANK));
2991
2992                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2993                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2994                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2995                                 NUM_BANKS(ADDR_SURF_8_BANK));
2996
2997                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2998                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2999                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3000                                 NUM_BANKS(ADDR_SURF_4_BANK));
3001
3002                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3003                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3004
3005                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3006                         if (reg_offset != 7)
3007                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3008
3009                 break;
3010         case CHIP_POLARIS10:
3011                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3012                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3013                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3014                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3015                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3016                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3017                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3018                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3019                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3020                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3021                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3022                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3023                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3024                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3025                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3026                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3027                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3028                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3029                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3030                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3031                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3032                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3033                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3034                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3035                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3036                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3037                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3038                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3039                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3040                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3041                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3042                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3043                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3044                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
3045                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3046                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3047                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3048                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3049                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3050                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3051                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3052                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3053                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3054                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3055                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3056                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3057                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3058                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3059                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3060                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3061                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3062                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3063                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3064                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3065                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3066                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3067                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3068                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3069                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3070                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3071                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3072                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3073                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3074                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3075                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3076                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3077                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3078                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3079                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3080                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3081                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3082                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3083                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3084                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3085                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3086                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3087                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3088                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3089                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3090                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3091                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3092                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3093                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3094                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3095                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3096                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3097                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3098                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3099                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3100                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3101                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3102                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3103                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3104                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3105                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3106                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3107                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3108                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3109                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3110                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3111                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3112                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3113                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3114                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3115                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3116                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3117                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3118                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3119                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3120                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3121                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3122                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3123                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3124                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3125                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3126                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3127                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3128                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3129                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3130                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3131                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3132                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3133
3134                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3135                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3136                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3137                                 NUM_BANKS(ADDR_SURF_16_BANK));
3138
3139                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3140                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3141                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3142                                 NUM_BANKS(ADDR_SURF_16_BANK));
3143
3144                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3145                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3146                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3147                                 NUM_BANKS(ADDR_SURF_16_BANK));
3148
3149                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3150                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3151                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3152                                 NUM_BANKS(ADDR_SURF_16_BANK));
3153
3154                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3155                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3156                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3157                                 NUM_BANKS(ADDR_SURF_16_BANK));
3158
3159                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3160                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3161                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3162                                 NUM_BANKS(ADDR_SURF_16_BANK));
3163
3164                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3165                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3166                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3167                                 NUM_BANKS(ADDR_SURF_16_BANK));
3168
3169                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3170                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3171                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3172                                 NUM_BANKS(ADDR_SURF_16_BANK));
3173
3174                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3175                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3176                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3177                                 NUM_BANKS(ADDR_SURF_16_BANK));
3178
3179                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3180                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3181                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3182                                 NUM_BANKS(ADDR_SURF_16_BANK));
3183
3184                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3185                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3186                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3187                                 NUM_BANKS(ADDR_SURF_16_BANK));
3188
3189                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3190                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3191                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3192                                 NUM_BANKS(ADDR_SURF_8_BANK));
3193
3194                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3195                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3196                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3197                                 NUM_BANKS(ADDR_SURF_4_BANK));
3198
3199                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3200                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3201                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3202                                 NUM_BANKS(ADDR_SURF_4_BANK));
3203
3204                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3205                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3206
3207                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3208                         if (reg_offset != 7)
3209                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3210
3211                 break;
3212         case CHIP_STONEY:
3213                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3214                                 PIPE_CONFIG(ADDR_SURF_P2) |
3215                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3216                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3217                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3218                                 PIPE_CONFIG(ADDR_SURF_P2) |
3219                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3220                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3221                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3222                                 PIPE_CONFIG(ADDR_SURF_P2) |
3223                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3224                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3225                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3226                                 PIPE_CONFIG(ADDR_SURF_P2) |
3227                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3228                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3229                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3230                                 PIPE_CONFIG(ADDR_SURF_P2) |
3231                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3232                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3233                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3234                                 PIPE_CONFIG(ADDR_SURF_P2) |
3235                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3236                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3237                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3238                                 PIPE_CONFIG(ADDR_SURF_P2) |
3239                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3240                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3241                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3242                                 PIPE_CONFIG(ADDR_SURF_P2));
3243                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3244                                 PIPE_CONFIG(ADDR_SURF_P2) |
3245                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3246                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3247                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3248                                  PIPE_CONFIG(ADDR_SURF_P2) |
3249                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3250                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3251                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3252                                  PIPE_CONFIG(ADDR_SURF_P2) |
3253                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3254                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3255                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3256                                  PIPE_CONFIG(ADDR_SURF_P2) |
3257                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3258                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3259                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3260                                  PIPE_CONFIG(ADDR_SURF_P2) |
3261                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3262                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3263                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3264                                  PIPE_CONFIG(ADDR_SURF_P2) |
3265                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3266                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3267                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3268                                  PIPE_CONFIG(ADDR_SURF_P2) |
3269                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3270                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3271                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3272                                  PIPE_CONFIG(ADDR_SURF_P2) |
3273                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3274                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3275                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3276                                  PIPE_CONFIG(ADDR_SURF_P2) |
3277                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3278                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3279                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3280                                  PIPE_CONFIG(ADDR_SURF_P2) |
3281                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3282                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3283                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3284                                  PIPE_CONFIG(ADDR_SURF_P2) |
3285                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3286                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3287                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3288                                  PIPE_CONFIG(ADDR_SURF_P2) |
3289                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3290                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3291                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3292                                  PIPE_CONFIG(ADDR_SURF_P2) |
3293                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3294                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3295                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3296                                  PIPE_CONFIG(ADDR_SURF_P2) |
3297                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3298                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3299                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3300                                  PIPE_CONFIG(ADDR_SURF_P2) |
3301                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3302                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3303                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3304                                  PIPE_CONFIG(ADDR_SURF_P2) |
3305                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3306                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3307                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3308                                  PIPE_CONFIG(ADDR_SURF_P2) |
3309                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3310                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3311                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3312                                  PIPE_CONFIG(ADDR_SURF_P2) |
3313                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3314                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3315
3316                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3317                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3318                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3319                                 NUM_BANKS(ADDR_SURF_8_BANK));
3320                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3321                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3322                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3323                                 NUM_BANKS(ADDR_SURF_8_BANK));
3324                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3325                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3326                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3327                                 NUM_BANKS(ADDR_SURF_8_BANK));
3328                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3329                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3330                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3331                                 NUM_BANKS(ADDR_SURF_8_BANK));
3332                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3333                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3334                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3335                                 NUM_BANKS(ADDR_SURF_8_BANK));
3336                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3337                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3338                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3339                                 NUM_BANKS(ADDR_SURF_8_BANK));
3340                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3341                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3342                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3343                                 NUM_BANKS(ADDR_SURF_8_BANK));
3344                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3345                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3346                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3347                                 NUM_BANKS(ADDR_SURF_16_BANK));
3348                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3349                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3350                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3351                                 NUM_BANKS(ADDR_SURF_16_BANK));
3352                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3353                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3354                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3355                                  NUM_BANKS(ADDR_SURF_16_BANK));
3356                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3357                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3358                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3359                                  NUM_BANKS(ADDR_SURF_16_BANK));
3360                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3361                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3362                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3363                                  NUM_BANKS(ADDR_SURF_16_BANK));
3364                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3365                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3366                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3367                                  NUM_BANKS(ADDR_SURF_16_BANK));
3368                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3369                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3370                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3371                                  NUM_BANKS(ADDR_SURF_8_BANK));
3372
3373                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3374                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3375                             reg_offset != 23)
3376                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3377
3378                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3379                         if (reg_offset != 7)
3380                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3381
3382                 break;
3383         default:
3384                 dev_warn(adev->dev,
3385                          "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
3386                          adev->asic_type);
3387
3388         case CHIP_CARRIZO:
3389                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3390                                 PIPE_CONFIG(ADDR_SURF_P2) |
3391                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3392                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3393                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3394                                 PIPE_CONFIG(ADDR_SURF_P2) |
3395                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3396                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3397                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3398                                 PIPE_CONFIG(ADDR_SURF_P2) |
3399                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3400                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3401                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3402                                 PIPE_CONFIG(ADDR_SURF_P2) |
3403                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3404                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3405                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3406                                 PIPE_CONFIG(ADDR_SURF_P2) |
3407                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3408                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3409                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3410                                 PIPE_CONFIG(ADDR_SURF_P2) |
3411                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3412                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3413                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3414                                 PIPE_CONFIG(ADDR_SURF_P2) |
3415                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3416                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3417                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3418                                 PIPE_CONFIG(ADDR_SURF_P2));
3419                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3420                                 PIPE_CONFIG(ADDR_SURF_P2) |
3421                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3422                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3423                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3424                                  PIPE_CONFIG(ADDR_SURF_P2) |
3425                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3426                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3427                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3428                                  PIPE_CONFIG(ADDR_SURF_P2) |
3429                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3430                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3431                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3432                                  PIPE_CONFIG(ADDR_SURF_P2) |
3433                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3434                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3435                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3436                                  PIPE_CONFIG(ADDR_SURF_P2) |
3437                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3438                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3439                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3440                                  PIPE_CONFIG(ADDR_SURF_P2) |
3441                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3442                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3443                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3444                                  PIPE_CONFIG(ADDR_SURF_P2) |
3445                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3446                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3447                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3448                                  PIPE_CONFIG(ADDR_SURF_P2) |
3449                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3450                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3451                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3452                                  PIPE_CONFIG(ADDR_SURF_P2) |
3453                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3454                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3455                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3456                                  PIPE_CONFIG(ADDR_SURF_P2) |
3457                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3458                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3459                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3460                                  PIPE_CONFIG(ADDR_SURF_P2) |
3461                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3462                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3463                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3464                                  PIPE_CONFIG(ADDR_SURF_P2) |
3465                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3466                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3467                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3468                                  PIPE_CONFIG(ADDR_SURF_P2) |
3469                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3470                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3471                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3472                                  PIPE_CONFIG(ADDR_SURF_P2) |
3473                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3474                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3475                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3476                                  PIPE_CONFIG(ADDR_SURF_P2) |
3477                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3478                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3479                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3480                                  PIPE_CONFIG(ADDR_SURF_P2) |
3481                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3482                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3483                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3484                                  PIPE_CONFIG(ADDR_SURF_P2) |
3485                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3486                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3487                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3488                                  PIPE_CONFIG(ADDR_SURF_P2) |
3489                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3490                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3491
3492                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3493                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3494                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3495                                 NUM_BANKS(ADDR_SURF_8_BANK));
3496                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3497                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3498                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3499                                 NUM_BANKS(ADDR_SURF_8_BANK));
3500                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3501                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3502                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3503                                 NUM_BANKS(ADDR_SURF_8_BANK));
3504                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3505                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3506                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3507                                 NUM_BANKS(ADDR_SURF_8_BANK));
3508                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3509                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3510                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3511                                 NUM_BANKS(ADDR_SURF_8_BANK));
3512                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3513                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3514                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3515                                 NUM_BANKS(ADDR_SURF_8_BANK));
3516                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3517                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3518                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3519                                 NUM_BANKS(ADDR_SURF_8_BANK));
3520                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3521                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3522                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3523                                 NUM_BANKS(ADDR_SURF_16_BANK));
3524                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3525                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3526                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3527                                 NUM_BANKS(ADDR_SURF_16_BANK));
3528                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3529                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3530                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3531                                  NUM_BANKS(ADDR_SURF_16_BANK));
3532                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3533                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3534                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3535                                  NUM_BANKS(ADDR_SURF_16_BANK));
3536                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3537                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3538                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3539                                  NUM_BANKS(ADDR_SURF_16_BANK));
3540                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3541                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3542                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3543                                  NUM_BANKS(ADDR_SURF_16_BANK));
3544                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3545                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3546                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3547                                  NUM_BANKS(ADDR_SURF_8_BANK));
3548
3549                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3550                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3551                             reg_offset != 23)
3552                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3553
3554                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3555                         if (reg_offset != 7)
3556                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3557
3558                 break;
3559         }
3560 }
3561
3562 static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
3563                                   u32 se_num, u32 sh_num, u32 instance)
3564 {
3565         u32 data;
3566
3567         if (instance == 0xffffffff)
3568                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
3569         else
3570                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
3571
3572         if (se_num == 0xffffffff)
3573                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3574         else
3575                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3576
3577         if (sh_num == 0xffffffff)
3578                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3579         else
3580                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3581
3582         WREG32(mmGRBM_GFX_INDEX, data);
3583 }
3584
3585 static u32 gfx_v8_0_create_bitmask(u32 bit_width)
3586 {
3587         return (u32)((1ULL << bit_width) - 1);
3588 }
3589
3590 static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
3591 {
3592         u32 data, mask;
3593
3594         data =  RREG32(mmCC_RB_BACKEND_DISABLE) |
3595                 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3596
3597         data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE);
3598
3599         mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_backends_per_se /
3600                                        adev->gfx.config.max_sh_per_se);
3601
3602         return (~data) & mask;
3603 }
3604
3605 static void
3606 gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1)
3607 {
3608         switch (adev->asic_type) {
3609         case CHIP_FIJI:
3610                 *rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) |
3611                           RB_XSEL2(1) | PKR_MAP(2) |
3612                           PKR_XSEL(1) | PKR_YSEL(1) |
3613                           SE_MAP(2) | SE_XSEL(2) | SE_YSEL(3);
3614                 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) |
3615                            SE_PAIR_YSEL(2);
3616                 break;
3617         case CHIP_TONGA:
3618         case CHIP_POLARIS10:
3619                 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3620                           SE_XSEL(1) | SE_YSEL(1);
3621                 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(2) |
3622                            SE_PAIR_YSEL(2);
3623                 break;
3624         case CHIP_TOPAZ:
3625         case CHIP_CARRIZO:
3626                 *rconf |= RB_MAP_PKR0(2);
3627                 *rconf1 |= 0x0;
3628                 break;
3629         case CHIP_POLARIS11:
3630         case CHIP_POLARIS12:
3631                 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3632                           SE_XSEL(1) | SE_YSEL(1);
3633                 *rconf1 |= 0x0;
3634                 break;
3635         case CHIP_STONEY:
3636                 *rconf |= 0x0;
3637                 *rconf1 |= 0x0;
3638                 break;
3639         default:
3640                 DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type);
3641                 break;
3642         }
3643 }
3644
3645 static void
3646 gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev,
3647                                         u32 raster_config, u32 raster_config_1,
3648                                         unsigned rb_mask, unsigned num_rb)
3649 {
3650         unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1);
3651         unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1);
3652         unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2);
3653         unsigned rb_per_se = num_rb / num_se;
3654         unsigned se_mask[4];
3655         unsigned se;
3656
3657         se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask;
3658         se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask;
3659         se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask;
3660         se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask;
3661
3662         WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4));
3663         WARN_ON(!(sh_per_se == 1 || sh_per_se == 2));
3664         WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2));
3665
3666         if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
3667                              (!se_mask[2] && !se_mask[3]))) {
3668                 raster_config_1 &= ~SE_PAIR_MAP_MASK;
3669
3670                 if (!se_mask[0] && !se_mask[1]) {
3671                         raster_config_1 |=
3672                                 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3);
3673                 } else {
3674                         raster_config_1 |=
3675                                 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0);
3676                 }
3677         }
3678
3679         for (se = 0; se < num_se; se++) {
3680                 unsigned raster_config_se = raster_config;
3681                 unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
3682                 unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
3683                 int idx = (se / 2) * 2;
3684
3685                 if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
3686                         raster_config_se &= ~SE_MAP_MASK;
3687
3688                         if (!se_mask[idx]) {
3689                                 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3);
3690                         } else {
3691                                 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0);
3692                         }
3693                 }
3694
3695                 pkr0_mask &= rb_mask;
3696                 pkr1_mask &= rb_mask;
3697                 if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
3698                         raster_config_se &= ~PKR_MAP_MASK;
3699
3700                         if (!pkr0_mask) {
3701                                 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3);
3702                         } else {
3703                                 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0);
3704                         }
3705                 }
3706
3707                 if (rb_per_se >= 2) {
3708                         unsigned rb0_mask = 1 << (se * rb_per_se);
3709                         unsigned rb1_mask = rb0_mask << 1;
3710
3711                         rb0_mask &= rb_mask;
3712                         rb1_mask &= rb_mask;
3713                         if (!rb0_mask || !rb1_mask) {
3714                                 raster_config_se &= ~RB_MAP_PKR0_MASK;
3715
3716                                 if (!rb0_mask) {
3717                                         raster_config_se |=
3718                                                 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3);
3719                                 } else {
3720                                         raster_config_se |=
3721                                                 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0);
3722                                 }
3723                         }
3724
3725                         if (rb_per_se > 2) {
3726                                 rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
3727                                 rb1_mask = rb0_mask << 1;
3728                                 rb0_mask &= rb_mask;
3729                                 rb1_mask &= rb_mask;
3730                                 if (!rb0_mask || !rb1_mask) {
3731                                         raster_config_se &= ~RB_MAP_PKR1_MASK;
3732
3733                                         if (!rb0_mask) {
3734                                                 raster_config_se |=
3735                                                         RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3);
3736                                         } else {
3737                                                 raster_config_se |=
3738                                                         RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0);
3739                                         }
3740                                 }
3741                         }
3742                 }
3743
3744                 /* GRBM_GFX_INDEX has a different offset on VI */
3745                 gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff);
3746                 WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se);
3747                 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3748         }
3749
3750         /* GRBM_GFX_INDEX has a different offset on VI */
3751         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3752 }
3753
3754 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
3755 {
3756         int i, j;
3757         u32 data;
3758         u32 raster_config = 0, raster_config_1 = 0;
3759         u32 active_rbs = 0;
3760         u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
3761                                         adev->gfx.config.max_sh_per_se;
3762         unsigned num_rb_pipes;
3763
3764         mutex_lock(&adev->grbm_idx_mutex);
3765         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3766                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3767                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3768                         data = gfx_v8_0_get_rb_active_bitmap(adev);
3769                         active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
3770                                                rb_bitmap_width_per_sh);
3771                 }
3772         }
3773         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3774
3775         adev->gfx.config.backend_enable_mask = active_rbs;
3776         adev->gfx.config.num_rbs = hweight32(active_rbs);
3777
3778         num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se *
3779                              adev->gfx.config.max_shader_engines, 16);
3780
3781         gfx_v8_0_raster_config(adev, &raster_config, &raster_config_1);
3782
3783         if (!adev->gfx.config.backend_enable_mask ||
3784                         adev->gfx.config.num_rbs >= num_rb_pipes) {
3785                 WREG32(mmPA_SC_RASTER_CONFIG, raster_config);
3786                 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3787         } else {
3788                 gfx_v8_0_write_harvested_raster_configs(adev, raster_config, raster_config_1,
3789                                                         adev->gfx.config.backend_enable_mask,
3790                                                         num_rb_pipes);
3791         }
3792
3793         /* cache the values for userspace */
3794         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3795                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3796                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3797                         adev->gfx.config.rb_config[i][j].rb_backend_disable =
3798                                 RREG32(mmCC_RB_BACKEND_DISABLE);
3799                         adev->gfx.config.rb_config[i][j].user_rb_backend_disable =
3800                                 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3801                         adev->gfx.config.rb_config[i][j].raster_config =
3802                                 RREG32(mmPA_SC_RASTER_CONFIG);
3803                         adev->gfx.config.rb_config[i][j].raster_config_1 =
3804                                 RREG32(mmPA_SC_RASTER_CONFIG_1);
3805                 }
3806         }
3807         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3808         mutex_unlock(&adev->grbm_idx_mutex);
3809 }
3810
3811 /**
3812  * gfx_v8_0_init_compute_vmid - gart enable
3813  *
3814  * @rdev: amdgpu_device pointer
3815  *
3816  * Initialize compute vmid sh_mem registers
3817  *
3818  */
3819 #define DEFAULT_SH_MEM_BASES    (0x6000)
3820 #define FIRST_COMPUTE_VMID      (8)
3821 #define LAST_COMPUTE_VMID       (16)
3822 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
3823 {
3824         int i;
3825         uint32_t sh_mem_config;
3826         uint32_t sh_mem_bases;
3827
3828         /*
3829          * Configure apertures:
3830          * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
3831          * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
3832          * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
3833          */
3834         sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
3835
3836         sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
3837                         SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
3838                         SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
3839                         SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
3840                         MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
3841                         SH_MEM_CONFIG__PRIVATE_ATC_MASK;
3842
3843         mutex_lock(&adev->srbm_mutex);
3844         for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
3845                 vi_srbm_select(adev, 0, 0, 0, i);
3846                 /* CP and shaders */
3847                 WREG32(mmSH_MEM_CONFIG, sh_mem_config);
3848                 WREG32(mmSH_MEM_APE1_BASE, 1);
3849                 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3850                 WREG32(mmSH_MEM_BASES, sh_mem_bases);
3851         }
3852         vi_srbm_select(adev, 0, 0, 0, 0);
3853         mutex_unlock(&adev->srbm_mutex);
3854 }
3855
3856 static void gfx_v8_0_config_init(struct amdgpu_device *adev)
3857 {
3858         switch (adev->asic_type) {
3859         default:
3860                 adev->gfx.config.double_offchip_lds_buf = 1;
3861                 break;
3862         case CHIP_CARRIZO:
3863         case CHIP_STONEY:
3864                 adev->gfx.config.double_offchip_lds_buf = 0;
3865                 break;
3866         }
3867 }
3868
3869 static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
3870 {
3871         u32 tmp, sh_static_mem_cfg;
3872         int i;
3873
3874         WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF);
3875         WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3876         WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3877         WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
3878
3879         gfx_v8_0_tiling_mode_table_init(adev);
3880         gfx_v8_0_setup_rb(adev);
3881         gfx_v8_0_get_cu_info(adev);
3882         gfx_v8_0_config_init(adev);
3883
3884         /* XXX SH_MEM regs */
3885         /* where to put LDS, scratch, GPUVM in FSA64 space */
3886         sh_static_mem_cfg = REG_SET_FIELD(0, SH_STATIC_MEM_CONFIG,
3887                                    SWIZZLE_ENABLE, 1);
3888         sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3889                                    ELEMENT_SIZE, 1);
3890         sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3891                                    INDEX_STRIDE, 3);
3892         mutex_lock(&adev->srbm_mutex);
3893         for (i = 0; i < adev->vm_manager.num_ids; i++) {
3894                 vi_srbm_select(adev, 0, 0, 0, i);
3895                 /* CP and shaders */
3896                 if (i == 0) {
3897                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
3898                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3899                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3900                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3901                         WREG32(mmSH_MEM_CONFIG, tmp);
3902                         WREG32(mmSH_MEM_BASES, 0);
3903                 } else {
3904                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
3905                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3906                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3907                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3908                         WREG32(mmSH_MEM_CONFIG, tmp);
3909                         tmp = adev->mc.shared_aperture_start >> 48;
3910                         WREG32(mmSH_MEM_BASES, tmp);
3911                 }
3912
3913                 WREG32(mmSH_MEM_APE1_BASE, 1);
3914                 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3915                 WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg);
3916         }
3917         vi_srbm_select(adev, 0, 0, 0, 0);
3918         mutex_unlock(&adev->srbm_mutex);
3919
3920         gfx_v8_0_init_compute_vmid(adev);
3921
3922         mutex_lock(&adev->grbm_idx_mutex);
3923         /*
3924          * making sure that the following register writes will be broadcasted
3925          * to all the shaders
3926          */
3927         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3928
3929         WREG32(mmPA_SC_FIFO_SIZE,
3930                    (adev->gfx.config.sc_prim_fifo_size_frontend <<
3931                         PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
3932                    (adev->gfx.config.sc_prim_fifo_size_backend <<
3933                         PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
3934                    (adev->gfx.config.sc_hiz_tile_fifo_size <<
3935                         PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
3936                    (adev->gfx.config.sc_earlyz_tile_fifo_size <<
3937                         PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
3938
3939         tmp = RREG32(mmSPI_ARB_PRIORITY);
3940         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS0, 2);
3941         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS1, 2);
3942         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS2, 2);
3943         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS3, 2);
3944         WREG32(mmSPI_ARB_PRIORITY, tmp);
3945
3946         mutex_unlock(&adev->grbm_idx_mutex);
3947
3948 }
3949
3950 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3951 {
3952         u32 i, j, k;
3953         u32 mask;
3954
3955         mutex_lock(&adev->grbm_idx_mutex);
3956         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3957                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3958                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3959                         for (k = 0; k < adev->usec_timeout; k++) {
3960                                 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3961                                         break;
3962                                 udelay(1);
3963                         }
3964                 }
3965         }
3966         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3967         mutex_unlock(&adev->grbm_idx_mutex);
3968
3969         mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3970                 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3971                 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3972                 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3973         for (k = 0; k < adev->usec_timeout; k++) {
3974                 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3975                         break;
3976                 udelay(1);
3977         }
3978 }
3979
3980 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3981                                                bool enable)
3982 {
3983         u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3984
3985         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
3986         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
3987         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
3988         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
3989
3990         WREG32(mmCP_INT_CNTL_RING0, tmp);
3991 }
3992
3993 static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
3994 {
3995         /* csib */
3996         WREG32(mmRLC_CSIB_ADDR_HI,
3997                         adev->gfx.rlc.clear_state_gpu_addr >> 32);
3998         WREG32(mmRLC_CSIB_ADDR_LO,
3999                         adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
4000         WREG32(mmRLC_CSIB_LENGTH,
4001                         adev->gfx.rlc.clear_state_size);
4002 }
4003
4004 static void gfx_v8_0_parse_ind_reg_list(int *register_list_format,
4005                                 int ind_offset,
4006                                 int list_size,
4007                                 int *unique_indices,
4008                                 int *indices_count,
4009                                 int max_indices,
4010                                 int *ind_start_offsets,
4011                                 int *offset_count,
4012                                 int max_offset)
4013 {
4014         int indices;
4015         bool new_entry = true;
4016
4017         for (; ind_offset < list_size; ind_offset++) {
4018
4019                 if (new_entry) {
4020                         new_entry = false;
4021                         ind_start_offsets[*offset_count] = ind_offset;
4022                         *offset_count = *offset_count + 1;
4023                         BUG_ON(*offset_count >= max_offset);
4024                 }
4025
4026                 if (register_list_format[ind_offset] == 0xFFFFFFFF) {
4027                         new_entry = true;
4028                         continue;
4029                 }
4030
4031                 ind_offset += 2;
4032
4033                 /* look for the matching indice */
4034                 for (indices = 0;
4035                         indices < *indices_count;
4036                         indices++) {
4037                         if (unique_indices[indices] ==
4038                                 register_list_format[ind_offset])
4039                                 break;
4040                 }
4041
4042                 if (indices >= *indices_count) {
4043                         unique_indices[*indices_count] =
4044                                 register_list_format[ind_offset];
4045                         indices = *indices_count;
4046                         *indices_count = *indices_count + 1;
4047                         BUG_ON(*indices_count >= max_indices);
4048                 }
4049
4050                 register_list_format[ind_offset] = indices;
4051         }
4052 }
4053
4054 static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
4055 {
4056         int i, temp, data;
4057         int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0};
4058         int indices_count = 0;
4059         int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
4060         int offset_count = 0;
4061
4062         int list_size;
4063         unsigned int *register_list_format =
4064                 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
4065         if (!register_list_format)
4066                 return -ENOMEM;
4067         memcpy(register_list_format, adev->gfx.rlc.register_list_format,
4068                         adev->gfx.rlc.reg_list_format_size_bytes);
4069
4070         gfx_v8_0_parse_ind_reg_list(register_list_format,
4071                                 RLC_FormatDirectRegListLength,
4072                                 adev->gfx.rlc.reg_list_format_size_bytes >> 2,
4073                                 unique_indices,
4074                                 &indices_count,
4075                                 sizeof(unique_indices) / sizeof(int),
4076                                 indirect_start_offsets,
4077                                 &offset_count,
4078                                 sizeof(indirect_start_offsets)/sizeof(int));
4079
4080         /* save and restore list */
4081         WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1);
4082
4083         WREG32(mmRLC_SRM_ARAM_ADDR, 0);
4084         for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
4085                 WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]);
4086
4087         /* indirect list */
4088         WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start);
4089         for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
4090                 WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]);
4091
4092         list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
4093         list_size = list_size >> 1;
4094         WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size);
4095         WREG32(mmRLC_GPM_SCRATCH_DATA, list_size);
4096
4097         /* starting offsets starts */
4098         WREG32(mmRLC_GPM_SCRATCH_ADDR,
4099                 adev->gfx.rlc.starting_offsets_start);
4100         for (i = 0; i < sizeof(indirect_start_offsets)/sizeof(int); i++)
4101                 WREG32(mmRLC_GPM_SCRATCH_DATA,
4102                                 indirect_start_offsets[i]);
4103
4104         /* unique indices */
4105         temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
4106         data = mmRLC_SRM_INDEX_CNTL_DATA_0;
4107         for (i = 0; i < sizeof(unique_indices) / sizeof(int); i++) {
4108                 if (unique_indices[i] != 0) {
4109                         WREG32(temp + i, unique_indices[i] & 0x3FFFF);
4110                         WREG32(data + i, unique_indices[i] >> 20);
4111                 }
4112         }
4113         kfree(register_list_format);
4114
4115         return 0;
4116 }
4117
4118 static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
4119 {
4120         WREG32_FIELD(RLC_SRM_CNTL, SRM_ENABLE, 1);
4121 }
4122
4123 static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev)
4124 {
4125         uint32_t data;
4126
4127         WREG32_FIELD(CP_RB_WPTR_POLL_CNTL, IDLE_POLL_COUNT, 0x60);
4128
4129         data = REG_SET_FIELD(0, RLC_PG_DELAY, POWER_UP_DELAY, 0x10);
4130         data = REG_SET_FIELD(data, RLC_PG_DELAY, POWER_DOWN_DELAY, 0x10);
4131         data = REG_SET_FIELD(data, RLC_PG_DELAY, CMD_PROPAGATE_DELAY, 0x10);
4132         data = REG_SET_FIELD(data, RLC_PG_DELAY, MEM_SLEEP_DELAY, 0x10);
4133         WREG32(mmRLC_PG_DELAY, data);
4134
4135         WREG32_FIELD(RLC_PG_DELAY_2, SERDES_CMD_DELAY, 0x3);
4136         WREG32_FIELD(RLC_AUTO_PG_CTRL, GRBM_REG_SAVE_GFX_IDLE_THRESHOLD, 0x55f0);
4137
4138 }
4139
4140 static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
4141                                                 bool enable)
4142 {
4143         WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PU_ENABLE, enable ? 1 : 0);
4144 }
4145
4146 static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
4147                                                   bool enable)
4148 {
4149         WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PD_ENABLE, enable ? 1 : 0);
4150 }
4151
4152 static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable)
4153 {
4154         WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 0 : 1);
4155 }
4156
4157 static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
4158 {
4159         if ((adev->asic_type == CHIP_CARRIZO) ||
4160             (adev->asic_type == CHIP_STONEY)) {
4161                 gfx_v8_0_init_csb(adev);
4162                 gfx_v8_0_init_save_restore_list(adev);
4163                 gfx_v8_0_enable_save_restore_machine(adev);
4164                 WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
4165                 gfx_v8_0_init_power_gating(adev);
4166                 WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
4167         } else if ((adev->asic_type == CHIP_POLARIS11) ||
4168                    (adev->asic_type == CHIP_POLARIS12)) {
4169                 gfx_v8_0_init_csb(adev);
4170                 gfx_v8_0_init_save_restore_list(adev);
4171                 gfx_v8_0_enable_save_restore_machine(adev);
4172                 gfx_v8_0_init_power_gating(adev);
4173         }
4174
4175 }
4176
4177 static void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
4178 {
4179         WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 0);
4180
4181         gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4182         gfx_v8_0_wait_for_rlc_serdes(adev);
4183 }
4184
4185 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
4186 {
4187         WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4188         udelay(50);
4189
4190         WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
4191         udelay(50);
4192 }
4193
4194 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
4195 {
4196         WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 1);
4197
4198         /* carrizo do enable cp interrupt after cp inited */
4199         if (!(adev->flags & AMD_IS_APU))
4200                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4201
4202         udelay(50);
4203 }
4204
4205 static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev)
4206 {
4207         const struct rlc_firmware_header_v2_0 *hdr;
4208         const __le32 *fw_data;
4209         unsigned i, fw_size;
4210
4211         if (!adev->gfx.rlc_fw)
4212                 return -EINVAL;
4213
4214         hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
4215         amdgpu_ucode_print_rlc_hdr(&hdr->header);
4216
4217         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
4218                            le32_to_cpu(hdr->header.ucode_array_offset_bytes));
4219         fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
4220
4221         WREG32(mmRLC_GPM_UCODE_ADDR, 0);
4222         for (i = 0; i < fw_size; i++)
4223                 WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
4224         WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
4225
4226         return 0;
4227 }
4228
4229 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
4230 {
4231         int r;
4232         u32 tmp;
4233
4234         gfx_v8_0_rlc_stop(adev);
4235
4236         /* disable CG */
4237         tmp = RREG32(mmRLC_CGCG_CGLS_CTRL);
4238         tmp &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
4239                  RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4240         WREG32(mmRLC_CGCG_CGLS_CTRL, tmp);
4241         if (adev->asic_type == CHIP_POLARIS11 ||
4242             adev->asic_type == CHIP_POLARIS10 ||
4243             adev->asic_type == CHIP_POLARIS12) {
4244                 tmp = RREG32(mmRLC_CGCG_CGLS_CTRL_3D);
4245                 tmp &= ~0x3;
4246                 WREG32(mmRLC_CGCG_CGLS_CTRL_3D, tmp);
4247         }
4248
4249         /* disable PG */
4250         WREG32(mmRLC_PG_CNTL, 0);
4251
4252         gfx_v8_0_rlc_reset(adev);
4253         gfx_v8_0_init_pg(adev);
4254
4255         if (!adev->pp_enabled) {
4256                 if (adev->firmware.load_type != AMDGPU_FW_LOAD_SMU) {
4257                         /* legacy rlc firmware loading */
4258                         r = gfx_v8_0_rlc_load_microcode(adev);
4259                         if (r)
4260                                 return r;
4261                 } else {
4262                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4263                                                         AMDGPU_UCODE_ID_RLC_G);
4264                         if (r)
4265                                 return -EINVAL;
4266                 }
4267         }
4268
4269         gfx_v8_0_rlc_start(adev);
4270
4271         return 0;
4272 }
4273
4274 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
4275 {
4276         int i;
4277         u32 tmp = RREG32(mmCP_ME_CNTL);
4278
4279         if (enable) {
4280                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
4281                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
4282                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
4283         } else {
4284                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
4285                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
4286                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
4287                 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
4288                         adev->gfx.gfx_ring[i].ready = false;
4289         }
4290         WREG32(mmCP_ME_CNTL, tmp);
4291         udelay(50);
4292 }
4293
4294 static int gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
4295 {
4296         const struct gfx_firmware_header_v1_0 *pfp_hdr;
4297         const struct gfx_firmware_header_v1_0 *ce_hdr;
4298         const struct gfx_firmware_header_v1_0 *me_hdr;
4299         const __le32 *fw_data;
4300         unsigned i, fw_size;
4301
4302         if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
4303                 return -EINVAL;
4304
4305         pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
4306                 adev->gfx.pfp_fw->data;
4307         ce_hdr = (const struct gfx_firmware_header_v1_0 *)
4308                 adev->gfx.ce_fw->data;
4309         me_hdr = (const struct gfx_firmware_header_v1_0 *)
4310                 adev->gfx.me_fw->data;
4311
4312         amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
4313         amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
4314         amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
4315
4316         gfx_v8_0_cp_gfx_enable(adev, false);
4317
4318         /* PFP */
4319         fw_data = (const __le32 *)
4320                 (adev->gfx.pfp_fw->data +
4321                  le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
4322         fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
4323         WREG32(mmCP_PFP_UCODE_ADDR, 0);
4324         for (i = 0; i < fw_size; i++)
4325                 WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
4326         WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
4327
4328         /* CE */
4329         fw_data = (const __le32 *)
4330                 (adev->gfx.ce_fw->data +
4331                  le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
4332         fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
4333         WREG32(mmCP_CE_UCODE_ADDR, 0);
4334         for (i = 0; i < fw_size; i++)
4335                 WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
4336         WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
4337
4338         /* ME */
4339         fw_data = (const __le32 *)
4340                 (adev->gfx.me_fw->data +
4341                  le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
4342         fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
4343         WREG32(mmCP_ME_RAM_WADDR, 0);
4344         for (i = 0; i < fw_size; i++)
4345                 WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
4346         WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
4347
4348         return 0;
4349 }
4350
4351 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
4352 {
4353         u32 count = 0;
4354         const struct cs_section_def *sect = NULL;
4355         const struct cs_extent_def *ext = NULL;
4356
4357         /* begin clear state */
4358         count += 2;
4359         /* context control state */
4360         count += 3;
4361
4362         for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4363                 for (ext = sect->section; ext->extent != NULL; ++ext) {
4364                         if (sect->id == SECT_CONTEXT)
4365                                 count += 2 + ext->reg_count;
4366                         else
4367                                 return 0;
4368                 }
4369         }
4370         /* pa_sc_raster_config/pa_sc_raster_config1 */
4371         count += 4;
4372         /* end clear state */
4373         count += 2;
4374         /* clear state */
4375         count += 2;
4376
4377         return count;
4378 }
4379
4380 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
4381 {
4382         struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
4383         const struct cs_section_def *sect = NULL;
4384         const struct cs_extent_def *ext = NULL;
4385         int r, i;
4386
4387         /* init the CP */
4388         WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
4389         WREG32(mmCP_ENDIAN_SWAP, 0);
4390         WREG32(mmCP_DEVICE_ID, 1);
4391
4392         gfx_v8_0_cp_gfx_enable(adev, true);
4393
4394         r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
4395         if (r) {
4396                 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
4397                 return r;
4398         }
4399
4400         /* clear state buffer */
4401         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4402         amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4403
4404         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4405         amdgpu_ring_write(ring, 0x80000000);
4406         amdgpu_ring_write(ring, 0x80000000);
4407
4408         for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4409                 for (ext = sect->section; ext->extent != NULL; ++ext) {
4410                         if (sect->id == SECT_CONTEXT) {
4411                                 amdgpu_ring_write(ring,
4412                                        PACKET3(PACKET3_SET_CONTEXT_REG,
4413                                                ext->reg_count));
4414                                 amdgpu_ring_write(ring,
4415                                        ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
4416                                 for (i = 0; i < ext->reg_count; i++)
4417                                         amdgpu_ring_write(ring, ext->extent[i]);
4418                         }
4419                 }
4420         }
4421
4422         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4423         amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
4424         switch (adev->asic_type) {
4425         case CHIP_TONGA:
4426         case CHIP_POLARIS10:
4427                 amdgpu_ring_write(ring, 0x16000012);
4428                 amdgpu_ring_write(ring, 0x0000002A);
4429                 break;
4430         case CHIP_POLARIS11:
4431         case CHIP_POLARIS12:
4432                 amdgpu_ring_write(ring, 0x16000012);
4433                 amdgpu_ring_write(ring, 0x00000000);
4434                 break;
4435         case CHIP_FIJI:
4436                 amdgpu_ring_write(ring, 0x3a00161a);
4437                 amdgpu_ring_write(ring, 0x0000002e);
4438                 break;
4439         case CHIP_CARRIZO:
4440                 amdgpu_ring_write(ring, 0x00000002);
4441                 amdgpu_ring_write(ring, 0x00000000);
4442                 break;
4443         case CHIP_TOPAZ:
4444                 amdgpu_ring_write(ring, adev->gfx.config.num_rbs == 1 ?
4445                                 0x00000000 : 0x00000002);
4446                 amdgpu_ring_write(ring, 0x00000000);
4447                 break;
4448         case CHIP_STONEY:
4449                 amdgpu_ring_write(ring, 0x00000000);
4450                 amdgpu_ring_write(ring, 0x00000000);
4451                 break;
4452         default:
4453                 BUG();
4454         }
4455
4456         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4457         amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4458
4459         amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4460         amdgpu_ring_write(ring, 0);
4461
4462         /* init the CE partitions */
4463         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4464         amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4465         amdgpu_ring_write(ring, 0x8000);
4466         amdgpu_ring_write(ring, 0x8000);
4467
4468         amdgpu_ring_commit(ring);
4469
4470         return 0;
4471 }
4472
4473 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
4474 {
4475         struct amdgpu_ring *ring;
4476         u32 tmp;
4477         u32 rb_bufsz;
4478         u64 rb_addr, rptr_addr, wptr_gpu_addr;
4479         int r;
4480
4481         /* Set the write pointer delay */
4482         WREG32(mmCP_RB_WPTR_DELAY, 0);
4483
4484         /* set the RB to use vmid 0 */
4485         WREG32(mmCP_RB_VMID, 0);
4486
4487         /* Set ring buffer size */
4488         ring = &adev->gfx.gfx_ring[0];
4489         rb_bufsz = order_base_2(ring->ring_size / 8);
4490         tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
4491         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
4492         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
4493         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
4494 #ifdef __BIG_ENDIAN
4495         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
4496 #endif
4497         WREG32(mmCP_RB0_CNTL, tmp);
4498
4499         /* Initialize the ring buffer's read and write pointers */
4500         WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
4501         ring->wptr = 0;
4502         WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
4503
4504         /* set the wb address wether it's enabled or not */
4505         rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4506         WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
4507         WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
4508
4509         wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4510         WREG32(mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
4511         WREG32(mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
4512         mdelay(1);
4513         WREG32(mmCP_RB0_CNTL, tmp);
4514
4515         rb_addr = ring->gpu_addr >> 8;
4516         WREG32(mmCP_RB0_BASE, rb_addr);
4517         WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
4518
4519         /* no gfx doorbells on iceland */
4520         if (adev->asic_type != CHIP_TOPAZ) {
4521                 tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
4522                 if (ring->use_doorbell) {
4523                         tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4524                                             DOORBELL_OFFSET, ring->doorbell_index);
4525                         tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4526                                             DOORBELL_HIT, 0);
4527                         tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4528                                             DOORBELL_EN, 1);
4529                 } else {
4530                         tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4531                                             DOORBELL_EN, 0);
4532                 }
4533                 WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
4534
4535                 if (adev->asic_type == CHIP_TONGA) {
4536                         tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
4537                                             DOORBELL_RANGE_LOWER,
4538                                             AMDGPU_DOORBELL_GFX_RING0);
4539                         WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
4540
4541                         WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
4542                                CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
4543                 }
4544
4545         }
4546
4547         /* start the ring */
4548         amdgpu_ring_clear_ring(ring);
4549         gfx_v8_0_cp_gfx_start(adev);
4550         ring->ready = true;
4551         r = amdgpu_ring_test_ring(ring);
4552         if (r)
4553                 ring->ready = false;
4554
4555         return r;
4556 }
4557
4558 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
4559 {
4560         int i;
4561
4562         if (enable) {
4563                 WREG32(mmCP_MEC_CNTL, 0);
4564         } else {
4565                 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
4566                 for (i = 0; i < adev->gfx.num_compute_rings; i++)
4567                         adev->gfx.compute_ring[i].ready = false;
4568                 adev->gfx.kiq.ring.ready = false;
4569         }
4570         udelay(50);
4571 }
4572
4573 static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev)
4574 {
4575         const struct gfx_firmware_header_v1_0 *mec_hdr;
4576         const __le32 *fw_data;
4577         unsigned i, fw_size;
4578
4579         if (!adev->gfx.mec_fw)
4580                 return -EINVAL;
4581
4582         gfx_v8_0_cp_compute_enable(adev, false);
4583
4584         mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
4585         amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
4586
4587         fw_data = (const __le32 *)
4588                 (adev->gfx.mec_fw->data +
4589                  le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4590         fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4591
4592         /* MEC1 */
4593         WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0);
4594         for (i = 0; i < fw_size; i++)
4595                 WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data+i));
4596         WREG32(mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version);
4597
4598         /* Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
4599         if (adev->gfx.mec2_fw) {
4600                 const struct gfx_firmware_header_v1_0 *mec2_hdr;
4601
4602                 mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
4603                 amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header);
4604
4605                 fw_data = (const __le32 *)
4606                         (adev->gfx.mec2_fw->data +
4607                          le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4608                 fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4609
4610                 WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0);
4611                 for (i = 0; i < fw_size; i++)
4612                         WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data+i));
4613                 WREG32(mmCP_MEC_ME2_UCODE_ADDR, adev->gfx.mec2_fw_version);
4614         }
4615
4616         return 0;
4617 }
4618
4619 static void gfx_v8_0_cp_compute_fini(struct amdgpu_device *adev)
4620 {
4621         int i, r;
4622
4623         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4624                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4625
4626                 if (ring->mqd_obj) {
4627                         r = amdgpu_bo_reserve(ring->mqd_obj, false);
4628                         if (unlikely(r != 0))
4629                                 dev_warn(adev->dev, "(%d) reserve MQD bo failed\n", r);
4630
4631                         amdgpu_bo_unpin(ring->mqd_obj);
4632                         amdgpu_bo_unreserve(ring->mqd_obj);
4633
4634                         amdgpu_bo_unref(&ring->mqd_obj);
4635                         ring->mqd_obj = NULL;
4636                         ring->mqd_ptr = NULL;
4637                         ring->mqd_gpu_addr = 0;
4638                 }
4639         }
4640 }
4641
4642 /* KIQ functions */
4643 static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring)
4644 {
4645         uint32_t tmp;
4646         struct amdgpu_device *adev = ring->adev;
4647
4648         /* tell RLC which is KIQ queue */
4649         tmp = RREG32(mmRLC_CP_SCHEDULERS);
4650         tmp &= 0xffffff00;
4651         tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
4652         WREG32(mmRLC_CP_SCHEDULERS, tmp);
4653         tmp |= 0x80;
4654         WREG32(mmRLC_CP_SCHEDULERS, tmp);
4655 }
4656
4657 static void gfx_v8_0_kiq_enable(struct amdgpu_ring *ring)
4658 {
4659         amdgpu_ring_alloc(ring, 8);
4660         /* set resources */
4661         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_RESOURCES, 6));
4662         amdgpu_ring_write(ring, 0);     /* vmid_mask:0 queue_type:0 (KIQ) */
4663         amdgpu_ring_write(ring, 0x000000FF);    /* queue mask lo */
4664         amdgpu_ring_write(ring, 0);     /* queue mask hi */
4665         amdgpu_ring_write(ring, 0);     /* gws mask lo */
4666         amdgpu_ring_write(ring, 0);     /* gws mask hi */
4667         amdgpu_ring_write(ring, 0);     /* oac mask */
4668         amdgpu_ring_write(ring, 0);     /* gds heap base:0, gds heap size:0 */
4669         amdgpu_ring_commit(ring);
4670         udelay(50);
4671 }
4672
4673 static void gfx_v8_0_map_queue_enable(struct amdgpu_ring *kiq_ring,
4674                                    struct amdgpu_ring *ring)
4675 {
4676         struct amdgpu_device *adev = kiq_ring->adev;
4677         uint64_t mqd_addr, wptr_addr;
4678
4679         mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
4680         wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4681         amdgpu_ring_alloc(kiq_ring, 8);
4682
4683         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
4684         /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
4685         amdgpu_ring_write(kiq_ring, 0x21010000);
4686         amdgpu_ring_write(kiq_ring, (ring->doorbell_index << 2) |
4687                         (ring->queue << 26) |
4688                         (ring->pipe << 29) |
4689                         ((ring->me == 1 ? 0 : 1) << 31)); /* doorbell */
4690         amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
4691         amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
4692         amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
4693         amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
4694         amdgpu_ring_commit(kiq_ring);
4695         udelay(50);
4696 }
4697
4698 static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
4699 {
4700         struct amdgpu_device *adev = ring->adev;
4701         struct vi_mqd *mqd = ring->mqd_ptr;
4702         uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
4703         uint32_t tmp;
4704
4705         mqd->header = 0xC0310800;
4706         mqd->compute_pipelinestat_enable = 0x00000001;
4707         mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4708         mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4709         mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4710         mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4711         mqd->compute_misc_reserved = 0x00000003;
4712
4713         eop_base_addr = ring->eop_gpu_addr >> 8;
4714         mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
4715         mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
4716
4717         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4718         tmp = RREG32(mmCP_HQD_EOP_CONTROL);
4719         tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
4720                         (order_base_2(MEC_HPD_SIZE / 4) - 1));
4721
4722         mqd->cp_hqd_eop_control = tmp;
4723
4724         /* enable doorbell? */
4725         tmp = REG_SET_FIELD(RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL),
4726                             CP_HQD_PQ_DOORBELL_CONTROL,
4727                             DOORBELL_EN,
4728                             ring->use_doorbell ? 1 : 0);
4729
4730         mqd->cp_hqd_pq_doorbell_control = tmp;
4731
4732         /* disable the queue if it's active */
4733         mqd->cp_hqd_dequeue_request = 0;
4734         mqd->cp_hqd_pq_rptr = 0;
4735         mqd->cp_hqd_pq_wptr = 0;
4736
4737         /* set the pointer to the MQD */
4738         mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
4739         mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
4740
4741         /* set MQD vmid to 0 */
4742         tmp = RREG32(mmCP_MQD_CONTROL);
4743         tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4744         mqd->cp_mqd_control = tmp;
4745
4746         /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4747         hqd_gpu_addr = ring->gpu_addr >> 8;
4748         mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4749         mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4750
4751         /* set up the HQD, this is similar to CP_RB0_CNTL */
4752         tmp = RREG32(mmCP_HQD_PQ_CONTROL);
4753         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4754                             (order_base_2(ring->ring_size / 4) - 1));
4755         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4756                         ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
4757 #ifdef __BIG_ENDIAN
4758         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
4759 #endif
4760         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
4761         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
4762         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4763         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4764         mqd->cp_hqd_pq_control = tmp;
4765
4766         /* set the wb address whether it's enabled or not */
4767         wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4768         mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4769         mqd->cp_hqd_pq_rptr_report_addr_hi =
4770                 upper_32_bits(wb_gpu_addr) & 0xffff;
4771
4772         /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4773         wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4774         mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
4775         mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4776
4777         tmp = 0;
4778         /* enable the doorbell if requested */
4779         if (ring->use_doorbell) {
4780                 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4781                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4782                                 DOORBELL_OFFSET, ring->doorbell_index);
4783
4784                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4785                                          DOORBELL_EN, 1);
4786                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4787                                          DOORBELL_SOURCE, 0);
4788                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4789                                          DOORBELL_HIT, 0);
4790         }
4791
4792         mqd->cp_hqd_pq_doorbell_control = tmp;
4793
4794         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4795         ring->wptr = 0;
4796         mqd->cp_hqd_pq_wptr = ring->wptr;
4797         mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
4798
4799         /* set the vmid for the queue */
4800         mqd->cp_hqd_vmid = 0;
4801
4802         tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
4803         tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
4804         mqd->cp_hqd_persistent_state = tmp;
4805
4806         /* activate the queue */
4807         mqd->cp_hqd_active = 1;
4808
4809         return 0;
4810 }
4811
4812 static int gfx_v8_0_kiq_init_register(struct amdgpu_ring *ring)
4813 {
4814         struct amdgpu_device *adev = ring->adev;
4815         struct vi_mqd *mqd = ring->mqd_ptr;
4816         int j;
4817
4818         /* disable wptr polling */
4819         WREG32_FIELD(CP_PQ_WPTR_POLL_CNTL, EN, 0);
4820
4821         WREG32(mmCP_HQD_EOP_BASE_ADDR, mqd->cp_hqd_eop_base_addr_lo);
4822         WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, mqd->cp_hqd_eop_base_addr_hi);
4823
4824         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4825         WREG32(mmCP_HQD_EOP_CONTROL, mqd->cp_hqd_eop_control);
4826
4827         /* enable doorbell? */
4828         WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, mqd->cp_hqd_pq_doorbell_control);
4829
4830         /* disable the queue if it's active */
4831         if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) {
4832                 WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1);
4833                 for (j = 0; j < adev->usec_timeout; j++) {
4834                         if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK))
4835                                 break;
4836                         udelay(1);
4837                 }
4838                 WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request);
4839                 WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr);
4840                 WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
4841         }
4842
4843         /* set the pointer to the MQD */
4844         WREG32(mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo);
4845         WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
4846
4847         /* set MQD vmid to 0 */
4848         WREG32(mmCP_MQD_CONTROL, mqd->cp_mqd_control);
4849
4850         /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4851         WREG32(mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo);
4852         WREG32(mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi);
4853
4854         /* set up the HQD, this is similar to CP_RB0_CNTL */
4855         WREG32(mmCP_HQD_PQ_CONTROL, mqd->cp_hqd_pq_control);
4856
4857         /* set the wb address whether it's enabled or not */
4858         WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR,
4859                                 mqd->cp_hqd_pq_rptr_report_addr_lo);
4860         WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4861                                 mqd->cp_hqd_pq_rptr_report_addr_hi);
4862
4863         /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4864         WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr_lo);
4865         WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, mqd->cp_hqd_pq_wptr_poll_addr_hi);
4866
4867         /* enable the doorbell if requested */
4868         if (ring->use_doorbell) {
4869                 if ((adev->asic_type == CHIP_CARRIZO) ||
4870                                 (adev->asic_type == CHIP_FIJI) ||
4871                                 (adev->asic_type == CHIP_STONEY)) {
4872                         WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER,
4873                                                 AMDGPU_DOORBELL_KIQ << 2);
4874                         WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER,
4875                                                 AMDGPU_DOORBELL_MEC_RING7 << 2);
4876                 }
4877         }
4878         WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, mqd->cp_hqd_pq_doorbell_control);
4879
4880         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4881         WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
4882
4883         /* set the vmid for the queue */
4884         WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid);
4885
4886         WREG32(mmCP_HQD_PERSISTENT_STATE, mqd->cp_hqd_persistent_state);
4887
4888         /* activate the queue */
4889         WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active);
4890
4891         if (ring->use_doorbell)
4892                 WREG32_FIELD(CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4893
4894         return 0;
4895 }
4896
4897 static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring)
4898 {
4899         struct amdgpu_device *adev = ring->adev;
4900         struct amdgpu_kiq *kiq = &adev->gfx.kiq;
4901         struct vi_mqd *mqd = ring->mqd_ptr;
4902         bool is_kiq = (ring->funcs->type == AMDGPU_RING_TYPE_KIQ);
4903         int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
4904
4905         if (is_kiq) {
4906                 gfx_v8_0_kiq_setting(&kiq->ring);
4907         } else {
4908                 mqd_idx = ring - &adev->gfx.compute_ring[0];
4909         }
4910
4911         if (!adev->gfx.in_reset) {
4912                 memset((void *)mqd, 0, sizeof(*mqd));
4913                 mutex_lock(&adev->srbm_mutex);
4914                 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4915                 gfx_v8_0_mqd_init(ring);
4916                 if (is_kiq)
4917                         gfx_v8_0_kiq_init_register(ring);
4918                 vi_srbm_select(adev, 0, 0, 0, 0);
4919                 mutex_unlock(&adev->srbm_mutex);
4920
4921                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4922                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
4923         } else { /* for GPU_RESET case */
4924                 /* reset MQD to a clean status */
4925                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4926                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd));
4927
4928                 /* reset ring buffer */
4929                 ring->wptr = 0;
4930                 amdgpu_ring_clear_ring(ring);
4931
4932                 if (is_kiq) {
4933                     mutex_lock(&adev->srbm_mutex);
4934                     vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4935                     gfx_v8_0_kiq_init_register(ring);
4936                     vi_srbm_select(adev, 0, 0, 0, 0);
4937                     mutex_unlock(&adev->srbm_mutex);
4938                 }
4939         }
4940
4941         if (is_kiq)
4942                 gfx_v8_0_kiq_enable(ring);
4943         else
4944                 gfx_v8_0_map_queue_enable(&kiq->ring, ring);
4945
4946         return 0;
4947 }
4948
4949 static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev)
4950 {
4951         struct amdgpu_ring *ring = NULL;
4952         int r = 0, i;
4953
4954         gfx_v8_0_cp_compute_enable(adev, true);
4955
4956         ring = &adev->gfx.kiq.ring;
4957
4958         r = amdgpu_bo_reserve(ring->mqd_obj, false);
4959         if (unlikely(r != 0))
4960                 goto done;
4961
4962         r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4963         if (!r) {
4964                 r = gfx_v8_0_kiq_init_queue(ring);
4965                 amdgpu_bo_kunmap(ring->mqd_obj);
4966                 ring->mqd_ptr = NULL;
4967         }
4968         amdgpu_bo_unreserve(ring->mqd_obj);
4969         if (r)
4970                 goto done;
4971
4972         ring->ready = true;
4973         r = amdgpu_ring_test_ring(ring);
4974         if (r) {
4975                 ring->ready = false;
4976                 goto done;
4977         }
4978
4979         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4980                 ring = &adev->gfx.compute_ring[i];
4981
4982                 r = amdgpu_bo_reserve(ring->mqd_obj, false);
4983                 if (unlikely(r != 0))
4984                         goto done;
4985                 r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4986                 if (!r) {
4987                         r = gfx_v8_0_kiq_init_queue(ring);
4988                         amdgpu_bo_kunmap(ring->mqd_obj);
4989                         ring->mqd_ptr = NULL;
4990                 }
4991                 amdgpu_bo_unreserve(ring->mqd_obj);
4992                 if (r)
4993                         goto done;
4994
4995                 ring->ready = true;
4996                 r = amdgpu_ring_test_ring(ring);
4997                 if (r)
4998                         ring->ready = false;
4999         }
5000
5001 done:
5002         return r;
5003 }
5004
5005 static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev)
5006 {
5007         int r, i, j;
5008         u32 tmp;
5009         bool use_doorbell = true;
5010         u64 hqd_gpu_addr;
5011         u64 mqd_gpu_addr;
5012         u64 eop_gpu_addr;
5013         u64 wb_gpu_addr;
5014         u32 *buf;
5015         struct vi_mqd *mqd;
5016
5017         /* init the queues.  */
5018         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5019                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5020
5021                 if (ring->mqd_obj == NULL) {
5022                         r = amdgpu_bo_create(adev,
5023                                              sizeof(struct vi_mqd),
5024                                              PAGE_SIZE, true,
5025                                              AMDGPU_GEM_DOMAIN_GTT, 0, NULL,
5026                                              NULL, &ring->mqd_obj);
5027                         if (r) {
5028                                 dev_warn(adev->dev, "(%d) create MQD bo failed\n", r);
5029                                 return r;
5030                         }
5031                 }
5032
5033                 r = amdgpu_bo_reserve(ring->mqd_obj, false);
5034                 if (unlikely(r != 0)) {
5035                         gfx_v8_0_cp_compute_fini(adev);
5036                         return r;
5037                 }
5038                 r = amdgpu_bo_pin(ring->mqd_obj, AMDGPU_GEM_DOMAIN_GTT,
5039                                   &mqd_gpu_addr);
5040                 if (r) {
5041                         dev_warn(adev->dev, "(%d) pin MQD bo failed\n", r);
5042                         gfx_v8_0_cp_compute_fini(adev);
5043                         return r;
5044                 }
5045                 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&buf);
5046                 if (r) {
5047                         dev_warn(adev->dev, "(%d) map MQD bo failed\n", r);
5048                         gfx_v8_0_cp_compute_fini(adev);
5049                         return r;
5050                 }
5051
5052                 /* init the mqd struct */
5053                 memset(buf, 0, sizeof(struct vi_mqd));
5054
5055                 mqd = (struct vi_mqd *)buf;
5056                 mqd->header = 0xC0310800;
5057                 mqd->compute_pipelinestat_enable = 0x00000001;
5058                 mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
5059                 mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
5060                 mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
5061                 mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
5062                 mqd->compute_misc_reserved = 0x00000003;
5063
5064                 mutex_lock(&adev->srbm_mutex);
5065                 vi_srbm_select(adev, ring->me,
5066                                ring->pipe,
5067                                ring->queue, 0);
5068
5069                 eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE);
5070                 eop_gpu_addr >>= 8;
5071
5072                 /* write the EOP addr */
5073                 WREG32(mmCP_HQD_EOP_BASE_ADDR, eop_gpu_addr);
5074                 WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr));
5075
5076                 /* set the VMID assigned */
5077                 WREG32(mmCP_HQD_VMID, 0);
5078
5079                 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
5080                 tmp = RREG32(mmCP_HQD_EOP_CONTROL);
5081                 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
5082                                     (order_base_2(MEC_HPD_SIZE / 4) - 1));
5083                 WREG32(mmCP_HQD_EOP_CONTROL, tmp);
5084
5085                 /* disable wptr polling */
5086                 tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL);
5087                 tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0);
5088                 WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp);
5089
5090                 mqd->cp_hqd_eop_base_addr_lo =
5091                         RREG32(mmCP_HQD_EOP_BASE_ADDR);
5092                 mqd->cp_hqd_eop_base_addr_hi =
5093                         RREG32(mmCP_HQD_EOP_BASE_ADDR_HI);
5094
5095                 /* enable doorbell? */
5096                 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
5097                 if (use_doorbell) {
5098                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
5099                 } else {
5100                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 0);
5101                 }
5102                 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, tmp);
5103                 mqd->cp_hqd_pq_doorbell_control = tmp;
5104
5105                 /* disable the queue if it's active */
5106                 mqd->cp_hqd_dequeue_request = 0;
5107                 mqd->cp_hqd_pq_rptr = 0;
5108                 mqd->cp_hqd_pq_wptr= 0;
5109                 if (RREG32(mmCP_HQD_ACTIVE) & 1) {
5110                         WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1);
5111                         for (j = 0; j < adev->usec_timeout; j++) {
5112                                 if (!(RREG32(mmCP_HQD_ACTIVE) & 1))
5113                                         break;
5114                                 udelay(1);
5115                         }
5116                         WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request);
5117                         WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr);
5118                         WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
5119                 }
5120
5121                 /* set the pointer to the MQD */
5122                 mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffffffc;
5123                 mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
5124                 WREG32(mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo);
5125                 WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
5126
5127                 /* set MQD vmid to 0 */
5128                 tmp = RREG32(mmCP_MQD_CONTROL);
5129                 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
5130                 WREG32(mmCP_MQD_CONTROL, tmp);
5131                 mqd->cp_mqd_control = tmp;
5132
5133                 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
5134                 hqd_gpu_addr = ring->gpu_addr >> 8;
5135                 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
5136                 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
5137                 WREG32(mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo);
5138                 WREG32(mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi);
5139
5140                 /* set up the HQD, this is similar to CP_RB0_CNTL */
5141                 tmp = RREG32(mmCP_HQD_PQ_CONTROL);
5142                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
5143                                     (order_base_2(ring->ring_size / 4) - 1));
5144                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
5145                                ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
5146 #ifdef __BIG_ENDIAN
5147                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
5148 #endif
5149                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
5150                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
5151                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
5152                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
5153                 WREG32(mmCP_HQD_PQ_CONTROL, tmp);
5154                 mqd->cp_hqd_pq_control = tmp;
5155
5156                 /* set the wb address wether it's enabled or not */
5157                 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
5158                 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
5159                 mqd->cp_hqd_pq_rptr_report_addr_hi =
5160                         upper_32_bits(wb_gpu_addr) & 0xffff;
5161                 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR,
5162                        mqd->cp_hqd_pq_rptr_report_addr_lo);
5163                 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
5164                        mqd->cp_hqd_pq_rptr_report_addr_hi);
5165
5166                 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
5167                 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
5168                 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
5169                 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
5170                 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr_lo);
5171                 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
5172                        mqd->cp_hqd_pq_wptr_poll_addr_hi);
5173
5174                 /* enable the doorbell if requested */
5175                 if (use_doorbell) {
5176                         if ((adev->asic_type == CHIP_CARRIZO) ||
5177                             (adev->asic_type == CHIP_FIJI) ||
5178                             (adev->asic_type == CHIP_STONEY) ||
5179                             (adev->asic_type == CHIP_POLARIS11) ||
5180                             (adev->asic_type == CHIP_POLARIS10) ||
5181                             (adev->asic_type == CHIP_POLARIS12)) {
5182                                 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER,
5183                                        AMDGPU_DOORBELL_KIQ << 2);
5184                                 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER,
5185                                        AMDGPU_DOORBELL_MEC_RING7 << 2);
5186                         }
5187                         tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
5188                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
5189                                             DOORBELL_OFFSET, ring->doorbell_index);
5190                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
5191                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_SOURCE, 0);
5192                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_HIT, 0);
5193                         mqd->cp_hqd_pq_doorbell_control = tmp;
5194
5195                 } else {
5196                         mqd->cp_hqd_pq_doorbell_control = 0;
5197                 }
5198                 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL,
5199                        mqd->cp_hqd_pq_doorbell_control);
5200
5201                 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
5202                 ring->wptr = 0;
5203                 mqd->cp_hqd_pq_wptr = lower_32_bits(ring->wptr);
5204                 WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
5205                 mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
5206
5207                 /* set the vmid for the queue */
5208                 mqd->cp_hqd_vmid = 0;
5209                 WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid);
5210
5211                 tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
5212                 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
5213                 WREG32(mmCP_HQD_PERSISTENT_STATE, tmp);
5214                 mqd->cp_hqd_persistent_state = tmp;
5215                 if (adev->asic_type == CHIP_STONEY ||
5216                         adev->asic_type == CHIP_POLARIS11 ||
5217                         adev->asic_type == CHIP_POLARIS10 ||
5218                         adev->asic_type == CHIP_POLARIS12) {
5219                         tmp = RREG32(mmCP_ME1_PIPE3_INT_CNTL);
5220                         tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE3_INT_CNTL, GENERIC2_INT_ENABLE, 1);
5221                         WREG32(mmCP_ME1_PIPE3_INT_CNTL, tmp);
5222                 }
5223
5224                 /* activate the queue */
5225                 mqd->cp_hqd_active = 1;
5226                 WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active);
5227
5228                 vi_srbm_select(adev, 0, 0, 0, 0);
5229                 mutex_unlock(&adev->srbm_mutex);
5230
5231                 amdgpu_bo_kunmap(ring->mqd_obj);
5232                 amdgpu_bo_unreserve(ring->mqd_obj);
5233         }
5234
5235         if (use_doorbell) {
5236                 tmp = RREG32(mmCP_PQ_STATUS);
5237                 tmp = REG_SET_FIELD(tmp, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
5238                 WREG32(mmCP_PQ_STATUS, tmp);
5239         }
5240
5241         gfx_v8_0_cp_compute_enable(adev, true);
5242
5243         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5244                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5245
5246                 ring->ready = true;
5247                 r = amdgpu_ring_test_ring(ring);
5248                 if (r)
5249                         ring->ready = false;
5250         }
5251
5252         return 0;
5253 }
5254
5255 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
5256 {
5257         int r;
5258
5259         if (!(adev->flags & AMD_IS_APU))
5260                 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5261
5262         if (!adev->pp_enabled) {
5263                 if (adev->firmware.load_type != AMDGPU_FW_LOAD_SMU) {
5264                         /* legacy firmware loading */
5265                         r = gfx_v8_0_cp_gfx_load_microcode(adev);
5266                         if (r)
5267                                 return r;
5268
5269                         r = gfx_v8_0_cp_compute_load_microcode(adev);
5270                         if (r)
5271                                 return r;
5272                 } else {
5273                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
5274                                                         AMDGPU_UCODE_ID_CP_CE);
5275                         if (r)
5276                                 return -EINVAL;
5277
5278                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
5279                                                         AMDGPU_UCODE_ID_CP_PFP);
5280                         if (r)
5281                                 return -EINVAL;
5282
5283                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
5284                                                         AMDGPU_UCODE_ID_CP_ME);
5285                         if (r)
5286                                 return -EINVAL;
5287
5288                         if (adev->asic_type == CHIP_TOPAZ) {
5289                                 r = gfx_v8_0_cp_compute_load_microcode(adev);
5290                                 if (r)
5291                                         return r;
5292                         } else {
5293                                 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
5294                                                                                  AMDGPU_UCODE_ID_CP_MEC1);
5295                                 if (r)
5296                                         return -EINVAL;
5297                         }
5298                 }
5299         }
5300
5301         r = gfx_v8_0_cp_gfx_resume(adev);
5302         if (r)
5303                 return r;
5304
5305         if (amdgpu_sriov_vf(adev))
5306                 r = gfx_v8_0_kiq_resume(adev);
5307         else
5308                 r = gfx_v8_0_cp_compute_resume(adev);
5309         if (r)
5310                 return r;
5311
5312         gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5313
5314         return 0;
5315 }
5316
5317 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
5318 {
5319         gfx_v8_0_cp_gfx_enable(adev, enable);
5320         gfx_v8_0_cp_compute_enable(adev, enable);
5321 }
5322
5323 static int gfx_v8_0_hw_init(void *handle)
5324 {
5325         int r;
5326         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5327
5328         gfx_v8_0_init_golden_registers(adev);
5329         gfx_v8_0_gpu_init(adev);
5330
5331         r = gfx_v8_0_rlc_resume(adev);
5332         if (r)
5333                 return r;
5334
5335         r = gfx_v8_0_cp_resume(adev);
5336
5337         return r;
5338 }
5339
5340 static int gfx_v8_0_hw_fini(void *handle)
5341 {
5342         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5343
5344         amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
5345         amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
5346         if (amdgpu_sriov_vf(adev)) {
5347                 pr_debug("For SRIOV client, shouldn't do anything.\n");
5348                 return 0;
5349         }
5350         gfx_v8_0_cp_enable(adev, false);
5351         gfx_v8_0_rlc_stop(adev);
5352         gfx_v8_0_cp_compute_fini(adev);
5353
5354         amdgpu_set_powergating_state(adev,
5355                         AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_UNGATE);
5356
5357         return 0;
5358 }
5359
5360 static int gfx_v8_0_suspend(void *handle)
5361 {
5362         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5363
5364         return gfx_v8_0_hw_fini(adev);
5365 }
5366
5367 static int gfx_v8_0_resume(void *handle)
5368 {
5369         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5370
5371         return gfx_v8_0_hw_init(adev);
5372 }
5373
5374 static bool gfx_v8_0_is_idle(void *handle)
5375 {
5376         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5377
5378         if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE))
5379                 return false;
5380         else
5381                 return true;
5382 }
5383
5384 static int gfx_v8_0_wait_for_idle(void *handle)
5385 {
5386         unsigned i;
5387         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5388
5389         for (i = 0; i < adev->usec_timeout; i++) {
5390                 if (gfx_v8_0_is_idle(handle))
5391                         return 0;
5392
5393                 udelay(1);
5394         }
5395         return -ETIMEDOUT;
5396 }
5397
5398 static bool gfx_v8_0_check_soft_reset(void *handle)
5399 {
5400         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5401         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5402         u32 tmp;
5403
5404         /* GRBM_STATUS */
5405         tmp = RREG32(mmGRBM_STATUS);
5406         if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
5407                    GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
5408                    GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
5409                    GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
5410                    GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
5411                    GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK |
5412                    GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
5413                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5414                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
5415                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5416                                                 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
5417                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5418                                                 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5419         }
5420
5421         /* GRBM_STATUS2 */
5422         tmp = RREG32(mmGRBM_STATUS2);
5423         if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
5424                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5425                                                 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
5426
5427         if (REG_GET_FIELD(tmp, GRBM_STATUS2, CPF_BUSY) ||
5428             REG_GET_FIELD(tmp, GRBM_STATUS2, CPC_BUSY) ||
5429             REG_GET_FIELD(tmp, GRBM_STATUS2, CPG_BUSY)) {
5430                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5431                                                 SOFT_RESET_CPF, 1);
5432                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5433                                                 SOFT_RESET_CPC, 1);
5434                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5435                                                 SOFT_RESET_CPG, 1);
5436                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET,
5437                                                 SOFT_RESET_GRBM, 1);
5438         }
5439
5440         /* SRBM_STATUS */
5441         tmp = RREG32(mmSRBM_STATUS);
5442         if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
5443                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5444                                                 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5445         if (REG_GET_FIELD(tmp, SRBM_STATUS, SEM_BUSY))
5446                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5447                                                 SRBM_SOFT_RESET, SOFT_RESET_SEM, 1);
5448
5449         if (grbm_soft_reset || srbm_soft_reset) {
5450                 adev->gfx.grbm_soft_reset = grbm_soft_reset;
5451                 adev->gfx.srbm_soft_reset = srbm_soft_reset;
5452                 return true;
5453         } else {
5454                 adev->gfx.grbm_soft_reset = 0;
5455                 adev->gfx.srbm_soft_reset = 0;
5456                 return false;
5457         }
5458 }
5459
5460 static void gfx_v8_0_inactive_hqd(struct amdgpu_device *adev,
5461                                   struct amdgpu_ring *ring)
5462 {
5463         int i;
5464
5465         mutex_lock(&adev->srbm_mutex);
5466         vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5467         if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) {
5468                 WREG32_FIELD(CP_HQD_DEQUEUE_REQUEST, DEQUEUE_REQ, 2);
5469                 for (i = 0; i < adev->usec_timeout; i++) {
5470                         if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK))
5471                                 break;
5472                         udelay(1);
5473                 }
5474         }
5475         vi_srbm_select(adev, 0, 0, 0, 0);
5476         mutex_unlock(&adev->srbm_mutex);
5477 }
5478
5479 static int gfx_v8_0_pre_soft_reset(void *handle)
5480 {
5481         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5482         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5483
5484         if ((!adev->gfx.grbm_soft_reset) &&
5485             (!adev->gfx.srbm_soft_reset))
5486                 return 0;
5487
5488         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5489         srbm_soft_reset = adev->gfx.srbm_soft_reset;
5490
5491         /* stop the rlc */
5492         gfx_v8_0_rlc_stop(adev);
5493
5494         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5495             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5496                 /* Disable GFX parsing/prefetching */
5497                 gfx_v8_0_cp_gfx_enable(adev, false);
5498
5499         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5500             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5501             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5502             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5503                 int i;
5504
5505                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5506                         struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5507
5508                         gfx_v8_0_inactive_hqd(adev, ring);
5509                 }
5510                 /* Disable MEC parsing/prefetching */
5511                 gfx_v8_0_cp_compute_enable(adev, false);
5512         }
5513
5514        return 0;
5515 }
5516
5517 static int gfx_v8_0_soft_reset(void *handle)
5518 {
5519         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5520         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5521         u32 tmp;
5522
5523         if ((!adev->gfx.grbm_soft_reset) &&
5524             (!adev->gfx.srbm_soft_reset))
5525                 return 0;
5526
5527         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5528         srbm_soft_reset = adev->gfx.srbm_soft_reset;
5529
5530         if (grbm_soft_reset || srbm_soft_reset) {
5531                 tmp = RREG32(mmGMCON_DEBUG);
5532                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 1);
5533                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 1);
5534                 WREG32(mmGMCON_DEBUG, tmp);
5535                 udelay(50);
5536         }
5537
5538         if (grbm_soft_reset) {
5539                 tmp = RREG32(mmGRBM_SOFT_RESET);
5540                 tmp |= grbm_soft_reset;
5541                 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5542                 WREG32(mmGRBM_SOFT_RESET, tmp);
5543                 tmp = RREG32(mmGRBM_SOFT_RESET);
5544
5545                 udelay(50);
5546
5547                 tmp &= ~grbm_soft_reset;
5548                 WREG32(mmGRBM_SOFT_RESET, tmp);
5549                 tmp = RREG32(mmGRBM_SOFT_RESET);
5550         }
5551
5552         if (srbm_soft_reset) {
5553                 tmp = RREG32(mmSRBM_SOFT_RESET);
5554                 tmp |= srbm_soft_reset;
5555                 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5556                 WREG32(mmSRBM_SOFT_RESET, tmp);
5557                 tmp = RREG32(mmSRBM_SOFT_RESET);
5558
5559                 udelay(50);
5560
5561                 tmp &= ~srbm_soft_reset;
5562                 WREG32(mmSRBM_SOFT_RESET, tmp);
5563                 tmp = RREG32(mmSRBM_SOFT_RESET);
5564         }
5565
5566         if (grbm_soft_reset || srbm_soft_reset) {
5567                 tmp = RREG32(mmGMCON_DEBUG);
5568                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 0);
5569                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 0);
5570                 WREG32(mmGMCON_DEBUG, tmp);
5571         }
5572
5573         /* Wait a little for things to settle down */
5574         udelay(50);
5575
5576         return 0;
5577 }
5578
5579 static void gfx_v8_0_init_hqd(struct amdgpu_device *adev,
5580                               struct amdgpu_ring *ring)
5581 {
5582         mutex_lock(&adev->srbm_mutex);
5583         vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5584         WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
5585         WREG32(mmCP_HQD_PQ_RPTR, 0);
5586         WREG32(mmCP_HQD_PQ_WPTR, 0);
5587         vi_srbm_select(adev, 0, 0, 0, 0);
5588         mutex_unlock(&adev->srbm_mutex);
5589 }
5590
5591 static int gfx_v8_0_post_soft_reset(void *handle)
5592 {
5593         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5594         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5595
5596         if ((!adev->gfx.grbm_soft_reset) &&
5597             (!adev->gfx.srbm_soft_reset))
5598                 return 0;
5599
5600         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5601         srbm_soft_reset = adev->gfx.srbm_soft_reset;
5602
5603         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5604             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5605                 gfx_v8_0_cp_gfx_resume(adev);
5606
5607         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5608             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5609             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5610             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5611                 int i;
5612
5613                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5614                         struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5615
5616                         gfx_v8_0_init_hqd(adev, ring);
5617                 }
5618                 gfx_v8_0_cp_compute_resume(adev);
5619         }
5620         gfx_v8_0_rlc_start(adev);
5621
5622         return 0;
5623 }
5624
5625 /**
5626  * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
5627  *
5628  * @adev: amdgpu_device pointer
5629  *
5630  * Fetches a GPU clock counter snapshot.
5631  * Returns the 64 bit clock counter snapshot.
5632  */
5633 static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
5634 {
5635         uint64_t clock;
5636
5637         mutex_lock(&adev->gfx.gpu_clock_mutex);
5638         WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
5639         clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
5640                 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5641         mutex_unlock(&adev->gfx.gpu_clock_mutex);
5642         return clock;
5643 }
5644
5645 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
5646                                           uint32_t vmid,
5647                                           uint32_t gds_base, uint32_t gds_size,
5648                                           uint32_t gws_base, uint32_t gws_size,
5649                                           uint32_t oa_base, uint32_t oa_size)
5650 {
5651         gds_base = gds_base >> AMDGPU_GDS_SHIFT;
5652         gds_size = gds_size >> AMDGPU_GDS_SHIFT;
5653
5654         gws_base = gws_base >> AMDGPU_GWS_SHIFT;
5655         gws_size = gws_size >> AMDGPU_GWS_SHIFT;
5656
5657         oa_base = oa_base >> AMDGPU_OA_SHIFT;
5658         oa_size = oa_size >> AMDGPU_OA_SHIFT;
5659
5660         /* GDS Base */
5661         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5662         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5663                                 WRITE_DATA_DST_SEL(0)));
5664         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
5665         amdgpu_ring_write(ring, 0);
5666         amdgpu_ring_write(ring, gds_base);
5667
5668         /* GDS Size */
5669         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5670         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5671                                 WRITE_DATA_DST_SEL(0)));
5672         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
5673         amdgpu_ring_write(ring, 0);
5674         amdgpu_ring_write(ring, gds_size);
5675
5676         /* GWS */
5677         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5678         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5679                                 WRITE_DATA_DST_SEL(0)));
5680         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
5681         amdgpu_ring_write(ring, 0);
5682         amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
5683
5684         /* OA */
5685         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5686         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5687                                 WRITE_DATA_DST_SEL(0)));
5688         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
5689         amdgpu_ring_write(ring, 0);
5690         amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
5691 }
5692
5693 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
5694 {
5695         WREG32(mmSQ_IND_INDEX,
5696                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5697                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5698                 (address << SQ_IND_INDEX__INDEX__SHIFT) |
5699                 (SQ_IND_INDEX__FORCE_READ_MASK));
5700         return RREG32(mmSQ_IND_DATA);
5701 }
5702
5703 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
5704                            uint32_t wave, uint32_t thread,
5705                            uint32_t regno, uint32_t num, uint32_t *out)
5706 {
5707         WREG32(mmSQ_IND_INDEX,
5708                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5709                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5710                 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
5711                 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
5712                 (SQ_IND_INDEX__FORCE_READ_MASK) |
5713                 (SQ_IND_INDEX__AUTO_INCR_MASK));
5714         while (num--)
5715                 *(out++) = RREG32(mmSQ_IND_DATA);
5716 }
5717
5718 static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
5719 {
5720         /* type 0 wave data */
5721         dst[(*no_fields)++] = 0;
5722         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
5723         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
5724         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
5725         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
5726         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
5727         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
5728         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
5729         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
5730         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
5731         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
5732         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
5733         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
5734         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_LO);
5735         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_HI);
5736         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_LO);
5737         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI);
5738         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
5739         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
5740 }
5741
5742 static void gfx_v8_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
5743                                      uint32_t wave, uint32_t start,
5744                                      uint32_t size, uint32_t *dst)
5745 {
5746         wave_read_regs(
5747                 adev, simd, wave, 0,
5748                 start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
5749 }
5750
5751
5752 static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = {
5753         .get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter,
5754         .select_se_sh = &gfx_v8_0_select_se_sh,
5755         .read_wave_data = &gfx_v8_0_read_wave_data,
5756         .read_wave_sgprs = &gfx_v8_0_read_wave_sgprs,
5757 };
5758
5759 static int gfx_v8_0_early_init(void *handle)
5760 {
5761         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5762
5763         adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
5764         adev->gfx.num_compute_rings = GFX8_NUM_COMPUTE_RINGS;
5765         adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
5766         gfx_v8_0_set_ring_funcs(adev);
5767         gfx_v8_0_set_irq_funcs(adev);
5768         gfx_v8_0_set_gds_init(adev);
5769         gfx_v8_0_set_rlc_funcs(adev);
5770
5771         return 0;
5772 }
5773
5774 static int gfx_v8_0_late_init(void *handle)
5775 {
5776         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5777         int r;
5778
5779         r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
5780         if (r)
5781                 return r;
5782
5783         r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
5784         if (r)
5785                 return r;
5786
5787         /* requires IBs so do in late init after IB pool is initialized */
5788         r = gfx_v8_0_do_edc_gpr_workarounds(adev);
5789         if (r)
5790                 return r;
5791
5792         amdgpu_set_powergating_state(adev,
5793                         AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_GATE);
5794
5795         return 0;
5796 }
5797
5798 static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
5799                                                        bool enable)
5800 {
5801         if ((adev->asic_type == CHIP_POLARIS11) ||
5802             (adev->asic_type == CHIP_POLARIS12))
5803                 /* Send msg to SMU via Powerplay */
5804                 amdgpu_set_powergating_state(adev,
5805                                              AMD_IP_BLOCK_TYPE_SMC,
5806                                              enable ?
5807                                              AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE);
5808
5809         WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0);
5810 }
5811
5812 static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
5813                                                         bool enable)
5814 {
5815         WREG32_FIELD(RLC_PG_CNTL, DYN_PER_CU_PG_ENABLE, enable ? 1 : 0);
5816 }
5817
5818 static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
5819                 bool enable)
5820 {
5821         WREG32_FIELD(RLC_PG_CNTL, QUICK_PG_ENABLE, enable ? 1 : 0);
5822 }
5823
5824 static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
5825                                           bool enable)
5826 {
5827         WREG32_FIELD(RLC_PG_CNTL, GFX_POWER_GATING_ENABLE, enable ? 1 : 0);
5828 }
5829
5830 static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev,
5831                                                 bool enable)
5832 {
5833         WREG32_FIELD(RLC_PG_CNTL, GFX_PIPELINE_PG_ENABLE, enable ? 1 : 0);
5834
5835         /* Read any GFX register to wake up GFX. */
5836         if (!enable)
5837                 RREG32(mmDB_RENDER_CONTROL);
5838 }
5839
5840 static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev,
5841                                           bool enable)
5842 {
5843         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
5844                 cz_enable_gfx_cg_power_gating(adev, true);
5845                 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
5846                         cz_enable_gfx_pipeline_power_gating(adev, true);
5847         } else {
5848                 cz_enable_gfx_cg_power_gating(adev, false);
5849                 cz_enable_gfx_pipeline_power_gating(adev, false);
5850         }
5851 }
5852
5853 static int gfx_v8_0_set_powergating_state(void *handle,
5854                                           enum amd_powergating_state state)
5855 {
5856         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5857         bool enable = (state == AMD_PG_STATE_GATE);
5858
5859         if (amdgpu_sriov_vf(adev))
5860                 return 0;
5861
5862         switch (adev->asic_type) {
5863         case CHIP_CARRIZO:
5864         case CHIP_STONEY:
5865
5866                 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
5867                         cz_enable_sck_slow_down_on_power_up(adev, true);
5868                         cz_enable_sck_slow_down_on_power_down(adev, true);
5869                 } else {
5870                         cz_enable_sck_slow_down_on_power_up(adev, false);
5871                         cz_enable_sck_slow_down_on_power_down(adev, false);
5872                 }
5873                 if (adev->pg_flags & AMD_PG_SUPPORT_CP)
5874                         cz_enable_cp_power_gating(adev, true);
5875                 else
5876                         cz_enable_cp_power_gating(adev, false);
5877
5878                 cz_update_gfx_cg_power_gating(adev, enable);
5879
5880                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5881                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5882                 else
5883                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5884
5885                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5886                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5887                 else
5888                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5889                 break;
5890         case CHIP_POLARIS11:
5891         case CHIP_POLARIS12:
5892                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5893                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5894                 else
5895                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5896
5897                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5898                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5899                 else
5900                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5901
5902                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable)
5903                         polaris11_enable_gfx_quick_mg_power_gating(adev, true);
5904                 else
5905                         polaris11_enable_gfx_quick_mg_power_gating(adev, false);
5906                 break;
5907         default:
5908                 break;
5909         }
5910
5911         return 0;
5912 }
5913
5914 static void gfx_v8_0_get_clockgating_state(void *handle, u32 *flags)
5915 {
5916         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5917         int data;
5918
5919         if (amdgpu_sriov_vf(adev))
5920                 *flags = 0;
5921
5922         /* AMD_CG_SUPPORT_GFX_MGCG */
5923         data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5924         if (!(data & RLC_CGTT_MGCG_OVERRIDE__CPF_MASK))
5925                 *flags |= AMD_CG_SUPPORT_GFX_MGCG;
5926
5927         /* AMD_CG_SUPPORT_GFX_CGLG */
5928         data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5929         if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5930                 *flags |= AMD_CG_SUPPORT_GFX_CGCG;
5931
5932         /* AMD_CG_SUPPORT_GFX_CGLS */
5933         if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5934                 *flags |= AMD_CG_SUPPORT_GFX_CGLS;
5935
5936         /* AMD_CG_SUPPORT_GFX_CGTS */
5937         data = RREG32(mmCGTS_SM_CTRL_REG);
5938         if (!(data & CGTS_SM_CTRL_REG__OVERRIDE_MASK))
5939                 *flags |= AMD_CG_SUPPORT_GFX_CGTS;
5940
5941         /* AMD_CG_SUPPORT_GFX_CGTS_LS */
5942         if (!(data & CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK))
5943                 *flags |= AMD_CG_SUPPORT_GFX_CGTS_LS;
5944
5945         /* AMD_CG_SUPPORT_GFX_RLC_LS */
5946         data = RREG32(mmRLC_MEM_SLP_CNTL);
5947         if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
5948                 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
5949
5950         /* AMD_CG_SUPPORT_GFX_CP_LS */
5951         data = RREG32(mmCP_MEM_SLP_CNTL);
5952         if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
5953                 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
5954 }
5955
5956 static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
5957                                      uint32_t reg_addr, uint32_t cmd)
5958 {
5959         uint32_t data;
5960
5961         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
5962
5963         WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5964         WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5965
5966         data = RREG32(mmRLC_SERDES_WR_CTRL);
5967         if (adev->asic_type == CHIP_STONEY)
5968                 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5969                           RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5970                           RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5971                           RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5972                           RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5973                           RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5974                           RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5975                           RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5976                           RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5977         else
5978                 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5979                           RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5980                           RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5981                           RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5982                           RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5983                           RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5984                           RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5985                           RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5986                           RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
5987                           RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
5988                           RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5989         data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
5990                  (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
5991                  (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
5992                  (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
5993
5994         WREG32(mmRLC_SERDES_WR_CTRL, data);
5995 }
5996
5997 #define MSG_ENTER_RLC_SAFE_MODE     1
5998 #define MSG_EXIT_RLC_SAFE_MODE      0
5999 #define RLC_GPR_REG2__REQ_MASK 0x00000001
6000 #define RLC_GPR_REG2__REQ__SHIFT 0
6001 #define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001
6002 #define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e
6003
6004 static void iceland_enter_rlc_safe_mode(struct amdgpu_device *adev)
6005 {
6006         u32 data;
6007         unsigned i;
6008
6009         data = RREG32(mmRLC_CNTL);
6010         if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
6011                 return;
6012
6013         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
6014                 data |= RLC_SAFE_MODE__CMD_MASK;
6015                 data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
6016                 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
6017                 WREG32(mmRLC_SAFE_MODE, data);
6018
6019                 for (i = 0; i < adev->usec_timeout; i++) {
6020                         if ((RREG32(mmRLC_GPM_STAT) &
6021                              (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
6022                               RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
6023                             (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
6024                              RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
6025                                 break;
6026                         udelay(1);
6027                 }
6028
6029                 for (i = 0; i < adev->usec_timeout; i++) {
6030                         if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
6031                                 break;
6032                         udelay(1);
6033                 }
6034                 adev->gfx.rlc.in_safe_mode = true;
6035         }
6036 }
6037
6038 static void iceland_exit_rlc_safe_mode(struct amdgpu_device *adev)
6039 {
6040         u32 data = 0;
6041         unsigned i;
6042
6043         data = RREG32(mmRLC_CNTL);
6044         if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
6045                 return;
6046
6047         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
6048                 if (adev->gfx.rlc.in_safe_mode) {
6049                         data |= RLC_SAFE_MODE__CMD_MASK;
6050                         data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
6051                         WREG32(mmRLC_SAFE_MODE, data);
6052                         adev->gfx.rlc.in_safe_mode = false;
6053                 }
6054         }
6055
6056         for (i = 0; i < adev->usec_timeout; i++) {
6057                 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
6058                         break;
6059                 udelay(1);
6060         }
6061 }
6062
6063 static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
6064         .enter_safe_mode = iceland_enter_rlc_safe_mode,
6065         .exit_safe_mode = iceland_exit_rlc_safe_mode
6066 };
6067
6068 static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
6069                                                       bool enable)
6070 {
6071         uint32_t temp, data;
6072
6073         adev->gfx.rlc.funcs->enter_safe_mode(adev);
6074
6075         /* It is disabled by HW by default */
6076         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
6077                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
6078                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS)
6079                                 /* 1 - RLC memory Light sleep */
6080                                 WREG32_FIELD(RLC_MEM_SLP_CNTL, RLC_MEM_LS_EN, 1);
6081
6082                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS)
6083                                 WREG32_FIELD(CP_MEM_SLP_CNTL, CP_MEM_LS_EN, 1);
6084                 }
6085
6086                 /* 3 - RLC_CGTT_MGCG_OVERRIDE */
6087                 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
6088                 if (adev->flags & AMD_IS_APU)
6089                         data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
6090                                   RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
6091                                   RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK);
6092                 else
6093                         data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
6094                                   RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
6095                                   RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
6096                                   RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
6097
6098                 if (temp != data)
6099                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
6100
6101                 /* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6102                 gfx_v8_0_wait_for_rlc_serdes(adev);
6103
6104                 /* 5 - clear mgcg override */
6105                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
6106
6107                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
6108                         /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
6109                         temp = data = RREG32(mmCGTS_SM_CTRL_REG);
6110                         data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
6111                         data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
6112                         data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
6113                         data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
6114                         if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
6115                             (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
6116                                 data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
6117                         data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
6118                         data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
6119                         if (temp != data)
6120                                 WREG32(mmCGTS_SM_CTRL_REG, data);
6121                 }
6122                 udelay(50);
6123
6124                 /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6125                 gfx_v8_0_wait_for_rlc_serdes(adev);
6126         } else {
6127                 /* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
6128                 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
6129                 data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
6130                                 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
6131                                 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
6132                                 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
6133                 if (temp != data)
6134                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
6135
6136                 /* 2 - disable MGLS in RLC */
6137                 data = RREG32(mmRLC_MEM_SLP_CNTL);
6138                 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
6139                         data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
6140                         WREG32(mmRLC_MEM_SLP_CNTL, data);
6141                 }
6142
6143                 /* 3 - disable MGLS in CP */
6144                 data = RREG32(mmCP_MEM_SLP_CNTL);
6145                 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
6146                         data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
6147                         WREG32(mmCP_MEM_SLP_CNTL, data);
6148                 }
6149
6150                 /* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
6151                 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
6152                 data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
6153                                 CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
6154                 if (temp != data)
6155                         WREG32(mmCGTS_SM_CTRL_REG, data);
6156
6157                 /* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6158                 gfx_v8_0_wait_for_rlc_serdes(adev);
6159
6160                 /* 6 - set mgcg override */
6161                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
6162
6163                 udelay(50);
6164
6165                 /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6166                 gfx_v8_0_wait_for_rlc_serdes(adev);
6167         }
6168
6169         adev->gfx.rlc.funcs->exit_safe_mode(adev);
6170 }
6171
6172 static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
6173                                                       bool enable)
6174 {
6175         uint32_t temp, temp1, data, data1;
6176
6177         temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
6178
6179         adev->gfx.rlc.funcs->enter_safe_mode(adev);
6180
6181         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
6182                 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
6183                 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
6184                 if (temp1 != data1)
6185                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
6186
6187                 /* : wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6188                 gfx_v8_0_wait_for_rlc_serdes(adev);
6189
6190                 /* 2 - clear cgcg override */
6191                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
6192
6193                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6194                 gfx_v8_0_wait_for_rlc_serdes(adev);
6195
6196                 /* 3 - write cmd to set CGLS */
6197                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
6198
6199                 /* 4 - enable cgcg */
6200                 data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
6201
6202                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
6203                         /* enable cgls*/
6204                         data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
6205
6206                         temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
6207                         data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
6208
6209                         if (temp1 != data1)
6210                                 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
6211                 } else {
6212                         data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
6213                 }
6214
6215                 if (temp != data)
6216                         WREG32(mmRLC_CGCG_CGLS_CTRL, data);
6217
6218                 /* 5 enable cntx_empty_int_enable/cntx_busy_int_enable/
6219                  * Cmp_busy/GFX_Idle interrupts
6220                  */
6221                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
6222         } else {
6223                 /* disable cntx_empty_int_enable & GFX Idle interrupt */
6224                 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
6225
6226                 /* TEST CGCG */
6227                 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
6228                 data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
6229                                 RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
6230                 if (temp1 != data1)
6231                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
6232
6233                 /* read gfx register to wake up cgcg */
6234                 RREG32(mmCB_CGTT_SCLK_CTRL);
6235                 RREG32(mmCB_CGTT_SCLK_CTRL);
6236                 RREG32(mmCB_CGTT_SCLK_CTRL);
6237                 RREG32(mmCB_CGTT_SCLK_CTRL);
6238
6239                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6240                 gfx_v8_0_wait_for_rlc_serdes(adev);
6241
6242                 /* write cmd to Set CGCG Overrride */
6243                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
6244
6245                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6246                 gfx_v8_0_wait_for_rlc_serdes(adev);
6247
6248                 /* write cmd to Clear CGLS */
6249                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
6250
6251                 /* disable cgcg, cgls should be disabled too. */
6252                 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
6253                           RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
6254                 if (temp != data)
6255                         WREG32(mmRLC_CGCG_CGLS_CTRL, data);
6256         }
6257
6258         gfx_v8_0_wait_for_rlc_serdes(adev);
6259
6260         adev->gfx.rlc.funcs->exit_safe_mode(adev);
6261 }
6262 static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
6263                                             bool enable)
6264 {
6265         if (enable) {
6266                 /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
6267                  * ===  MGCG + MGLS + TS(CG/LS) ===
6268                  */
6269                 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
6270                 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
6271         } else {
6272                 /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
6273                  * ===  CGCG + CGLS ===
6274                  */
6275                 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
6276                 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
6277         }
6278         return 0;
6279 }
6280
6281 static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev,
6282                                           enum amd_clockgating_state state)
6283 {
6284         uint32_t msg_id, pp_state = 0;
6285         uint32_t pp_support_state = 0;
6286         void *pp_handle = adev->powerplay.pp_handle;
6287
6288         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
6289                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
6290                         pp_support_state = PP_STATE_SUPPORT_LS;
6291                         pp_state = PP_STATE_LS;
6292                 }
6293                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
6294                         pp_support_state |= PP_STATE_SUPPORT_CG;
6295                         pp_state |= PP_STATE_CG;
6296                 }
6297                 if (state == AMD_CG_STATE_UNGATE)
6298                         pp_state = 0;
6299
6300                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6301                                 PP_BLOCK_GFX_CG,
6302                                 pp_support_state,
6303                                 pp_state);
6304                 amd_set_clockgating_by_smu(pp_handle, msg_id);
6305         }
6306
6307         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
6308                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
6309                         pp_support_state = PP_STATE_SUPPORT_LS;
6310                         pp_state = PP_STATE_LS;
6311                 }
6312
6313                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
6314                         pp_support_state |= PP_STATE_SUPPORT_CG;
6315                         pp_state |= PP_STATE_CG;
6316                 }
6317
6318                 if (state == AMD_CG_STATE_UNGATE)
6319                         pp_state = 0;
6320
6321                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6322                                 PP_BLOCK_GFX_MG,
6323                                 pp_support_state,
6324                                 pp_state);
6325                 amd_set_clockgating_by_smu(pp_handle, msg_id);
6326         }
6327
6328         return 0;
6329 }
6330
6331 static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev,
6332                                           enum amd_clockgating_state state)
6333 {
6334
6335         uint32_t msg_id, pp_state = 0;
6336         uint32_t pp_support_state = 0;
6337         void *pp_handle = adev->powerplay.pp_handle;
6338
6339         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
6340                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
6341                         pp_support_state = PP_STATE_SUPPORT_LS;
6342                         pp_state = PP_STATE_LS;
6343                 }
6344                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
6345                         pp_support_state |= PP_STATE_SUPPORT_CG;
6346                         pp_state |= PP_STATE_CG;
6347                 }
6348                 if (state == AMD_CG_STATE_UNGATE)
6349                         pp_state = 0;
6350
6351                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6352                                 PP_BLOCK_GFX_CG,
6353                                 pp_support_state,
6354                                 pp_state);
6355                 amd_set_clockgating_by_smu(pp_handle, msg_id);
6356         }
6357
6358         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_GFX_3D_CGLS)) {
6359                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) {
6360                         pp_support_state = PP_STATE_SUPPORT_LS;
6361                         pp_state = PP_STATE_LS;
6362                 }
6363                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) {
6364                         pp_support_state |= PP_STATE_SUPPORT_CG;
6365                         pp_state |= PP_STATE_CG;
6366                 }
6367                 if (state == AMD_CG_STATE_UNGATE)
6368                         pp_state = 0;
6369
6370                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6371                                 PP_BLOCK_GFX_3D,
6372                                 pp_support_state,
6373                                 pp_state);
6374                 amd_set_clockgating_by_smu(pp_handle, msg_id);
6375         }
6376
6377         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
6378                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
6379                         pp_support_state = PP_STATE_SUPPORT_LS;
6380                         pp_state = PP_STATE_LS;
6381                 }
6382
6383                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
6384                         pp_support_state |= PP_STATE_SUPPORT_CG;
6385                         pp_state |= PP_STATE_CG;
6386                 }
6387
6388                 if (state == AMD_CG_STATE_UNGATE)
6389                         pp_state = 0;
6390
6391                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6392                                 PP_BLOCK_GFX_MG,
6393                                 pp_support_state,
6394                                 pp_state);
6395                 amd_set_clockgating_by_smu(pp_handle, msg_id);
6396         }
6397
6398         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
6399                 pp_support_state = PP_STATE_SUPPORT_LS;
6400
6401                 if (state == AMD_CG_STATE_UNGATE)
6402                         pp_state = 0;
6403                 else
6404                         pp_state = PP_STATE_LS;
6405
6406                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6407                                 PP_BLOCK_GFX_RLC,
6408                                 pp_support_state,
6409                                 pp_state);
6410                 amd_set_clockgating_by_smu(pp_handle, msg_id);
6411         }
6412
6413         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
6414                 pp_support_state = PP_STATE_SUPPORT_LS;
6415
6416                 if (state == AMD_CG_STATE_UNGATE)
6417                         pp_state = 0;
6418                 else
6419                         pp_state = PP_STATE_LS;
6420                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6421                         PP_BLOCK_GFX_CP,
6422                         pp_support_state,
6423                         pp_state);
6424                 amd_set_clockgating_by_smu(pp_handle, msg_id);
6425         }
6426
6427         return 0;
6428 }
6429
6430 static int gfx_v8_0_set_clockgating_state(void *handle,
6431                                           enum amd_clockgating_state state)
6432 {
6433         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
6434
6435         if (amdgpu_sriov_vf(adev))
6436                 return 0;
6437
6438         switch (adev->asic_type) {
6439         case CHIP_FIJI:
6440         case CHIP_CARRIZO:
6441         case CHIP_STONEY:
6442                 gfx_v8_0_update_gfx_clock_gating(adev,
6443                                                  state == AMD_CG_STATE_GATE);
6444                 break;
6445         case CHIP_TONGA:
6446                 gfx_v8_0_tonga_update_gfx_clock_gating(adev, state);
6447                 break;
6448         case CHIP_POLARIS10:
6449         case CHIP_POLARIS11:
6450         case CHIP_POLARIS12:
6451                 gfx_v8_0_polaris_update_gfx_clock_gating(adev, state);
6452                 break;
6453         default:
6454                 break;
6455         }
6456         return 0;
6457 }
6458
6459 static u64 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring)
6460 {
6461         return ring->adev->wb.wb[ring->rptr_offs];
6462 }
6463
6464 static u64 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
6465 {
6466         struct amdgpu_device *adev = ring->adev;
6467
6468         if (ring->use_doorbell)
6469                 /* XXX check if swapping is necessary on BE */
6470                 return ring->adev->wb.wb[ring->wptr_offs];
6471         else
6472                 return RREG32(mmCP_RB0_WPTR);
6473 }
6474
6475 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
6476 {
6477         struct amdgpu_device *adev = ring->adev;
6478
6479         if (ring->use_doorbell) {
6480                 /* XXX check if swapping is necessary on BE */
6481                 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6482                 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6483         } else {
6484                 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
6485                 (void)RREG32(mmCP_RB0_WPTR);
6486         }
6487 }
6488
6489 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
6490 {
6491         u32 ref_and_mask, reg_mem_engine;
6492
6493         if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) ||
6494             (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)) {
6495                 switch (ring->me) {
6496                 case 1:
6497                         ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
6498                         break;
6499                 case 2:
6500                         ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
6501                         break;
6502                 default:
6503                         return;
6504                 }
6505                 reg_mem_engine = 0;
6506         } else {
6507                 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
6508                 reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
6509         }
6510
6511         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6512         amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
6513                                  WAIT_REG_MEM_FUNCTION(3) |  /* == */
6514                                  reg_mem_engine));
6515         amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
6516         amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
6517         amdgpu_ring_write(ring, ref_and_mask);
6518         amdgpu_ring_write(ring, ref_and_mask);
6519         amdgpu_ring_write(ring, 0x20); /* poll interval */
6520 }
6521
6522 static void gfx_v8_0_ring_emit_vgt_flush(struct amdgpu_ring *ring)
6523 {
6524         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6525         amdgpu_ring_write(ring, EVENT_TYPE(VS_PARTIAL_FLUSH) |
6526                 EVENT_INDEX(4));
6527
6528         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6529         amdgpu_ring_write(ring, EVENT_TYPE(VGT_FLUSH) |
6530                 EVENT_INDEX(0));
6531 }
6532
6533
6534 static void gfx_v8_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
6535 {
6536         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6537         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6538                                  WRITE_DATA_DST_SEL(0) |
6539                                  WR_CONFIRM));
6540         amdgpu_ring_write(ring, mmHDP_DEBUG0);
6541         amdgpu_ring_write(ring, 0);
6542         amdgpu_ring_write(ring, 1);
6543
6544 }
6545
6546 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
6547                                       struct amdgpu_ib *ib,
6548                                       unsigned vm_id, bool ctx_switch)
6549 {
6550         u32 header, control = 0;
6551
6552         if (ib->flags & AMDGPU_IB_FLAG_CE)
6553                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
6554         else
6555                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
6556
6557         control |= ib->length_dw | (vm_id << 24);
6558
6559         if (amdgpu_sriov_vf(ring->adev) && ib->flags & AMDGPU_IB_FLAG_PREEMPT)
6560                 control |= INDIRECT_BUFFER_PRE_ENB(1);
6561
6562         amdgpu_ring_write(ring, header);
6563         amdgpu_ring_write(ring,
6564 #ifdef __BIG_ENDIAN
6565                           (2 << 0) |
6566 #endif
6567                           (ib->gpu_addr & 0xFFFFFFFC));
6568         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6569         amdgpu_ring_write(ring, control);
6570 }
6571
6572 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
6573                                           struct amdgpu_ib *ib,
6574                                           unsigned vm_id, bool ctx_switch)
6575 {
6576         u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vm_id << 24);
6577
6578         amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
6579         amdgpu_ring_write(ring,
6580 #ifdef __BIG_ENDIAN
6581                                 (2 << 0) |
6582 #endif
6583                                 (ib->gpu_addr & 0xFFFFFFFC));
6584         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6585         amdgpu_ring_write(ring, control);
6586 }
6587
6588 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
6589                                          u64 seq, unsigned flags)
6590 {
6591         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6592         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6593
6594         /* EVENT_WRITE_EOP - flush caches, send int */
6595         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6596         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6597                                  EOP_TC_ACTION_EN |
6598                                  EOP_TC_WB_ACTION_EN |
6599                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6600                                  EVENT_INDEX(5)));
6601         amdgpu_ring_write(ring, addr & 0xfffffffc);
6602         amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
6603                           DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6604         amdgpu_ring_write(ring, lower_32_bits(seq));
6605         amdgpu_ring_write(ring, upper_32_bits(seq));
6606
6607 }
6608
6609 static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
6610 {
6611         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6612         uint32_t seq = ring->fence_drv.sync_seq;
6613         uint64_t addr = ring->fence_drv.gpu_addr;
6614
6615         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6616         amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
6617                                  WAIT_REG_MEM_FUNCTION(3) | /* equal */
6618                                  WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
6619         amdgpu_ring_write(ring, addr & 0xfffffffc);
6620         amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
6621         amdgpu_ring_write(ring, seq);
6622         amdgpu_ring_write(ring, 0xffffffff);
6623         amdgpu_ring_write(ring, 4); /* poll interval */
6624 }
6625
6626 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
6627                                         unsigned vm_id, uint64_t pd_addr)
6628 {
6629         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6630
6631         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6632         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6633                                  WRITE_DATA_DST_SEL(0)) |
6634                                  WR_CONFIRM);
6635         if (vm_id < 8) {
6636                 amdgpu_ring_write(ring,
6637                                   (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id));
6638         } else {
6639                 amdgpu_ring_write(ring,
6640                                   (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vm_id - 8));
6641         }
6642         amdgpu_ring_write(ring, 0);
6643         amdgpu_ring_write(ring, pd_addr >> 12);
6644
6645         /* bits 0-15 are the VM contexts0-15 */
6646         /* invalidate the cache */
6647         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6648         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6649                                  WRITE_DATA_DST_SEL(0)));
6650         amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6651         amdgpu_ring_write(ring, 0);
6652         amdgpu_ring_write(ring, 1 << vm_id);
6653
6654         /* wait for the invalidate to complete */
6655         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6656         amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6657                                  WAIT_REG_MEM_FUNCTION(0) |  /* always */
6658                                  WAIT_REG_MEM_ENGINE(0))); /* me */
6659         amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6660         amdgpu_ring_write(ring, 0);
6661         amdgpu_ring_write(ring, 0); /* ref */
6662         amdgpu_ring_write(ring, 0); /* mask */
6663         amdgpu_ring_write(ring, 0x20); /* poll interval */
6664
6665         /* compute doesn't have PFP */
6666         if (usepfp) {
6667                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
6668                 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6669                 amdgpu_ring_write(ring, 0x0);
6670         }
6671 }
6672
6673 static u64 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
6674 {
6675         return ring->adev->wb.wb[ring->wptr_offs];
6676 }
6677
6678 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
6679 {
6680         struct amdgpu_device *adev = ring->adev;
6681
6682         /* XXX check if swapping is necessary on BE */
6683         adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6684         WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6685 }
6686
6687 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
6688                                              u64 addr, u64 seq,
6689                                              unsigned flags)
6690 {
6691         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6692         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6693
6694         /* RELEASE_MEM - flush caches, send int */
6695         amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
6696         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6697                                  EOP_TC_ACTION_EN |
6698                                  EOP_TC_WB_ACTION_EN |
6699                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6700                                  EVENT_INDEX(5)));
6701         amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6702         amdgpu_ring_write(ring, addr & 0xfffffffc);
6703         amdgpu_ring_write(ring, upper_32_bits(addr));
6704         amdgpu_ring_write(ring, lower_32_bits(seq));
6705         amdgpu_ring_write(ring, upper_32_bits(seq));
6706 }
6707
6708 static void gfx_v8_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
6709                                          u64 seq, unsigned int flags)
6710 {
6711         /* we only allocate 32bit for each seq wb address */
6712         BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
6713
6714         /* write fence seq to the "addr" */
6715         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6716         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6717                                  WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
6718         amdgpu_ring_write(ring, lower_32_bits(addr));
6719         amdgpu_ring_write(ring, upper_32_bits(addr));
6720         amdgpu_ring_write(ring, lower_32_bits(seq));
6721
6722         if (flags & AMDGPU_FENCE_FLAG_INT) {
6723                 /* set register to trigger INT */
6724                 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6725                 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6726                                          WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
6727                 amdgpu_ring_write(ring, mmCPC_INT_STATUS);
6728                 amdgpu_ring_write(ring, 0);
6729                 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
6730         }
6731 }
6732
6733 static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring)
6734 {
6735         amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6736         amdgpu_ring_write(ring, 0);
6737 }
6738
6739 static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
6740 {
6741         uint32_t dw2 = 0;
6742
6743         if (amdgpu_sriov_vf(ring->adev))
6744                 gfx_v8_0_ring_emit_ce_meta_init(ring,
6745                         (flags & AMDGPU_VM_DOMAIN) ? AMDGPU_CSA_VADDR : ring->adev->virt.csa_vmid0_addr);
6746
6747         dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
6748         if (flags & AMDGPU_HAVE_CTX_SWITCH) {
6749                 gfx_v8_0_ring_emit_vgt_flush(ring);
6750                 /* set load_global_config & load_global_uconfig */
6751                 dw2 |= 0x8001;
6752                 /* set load_cs_sh_regs */
6753                 dw2 |= 0x01000000;
6754                 /* set load_per_context_state & load_gfx_sh_regs for GFX */
6755                 dw2 |= 0x10002;
6756
6757                 /* set load_ce_ram if preamble presented */
6758                 if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
6759                         dw2 |= 0x10000000;
6760         } else {
6761                 /* still load_ce_ram if this is the first time preamble presented
6762                  * although there is no context switch happens.
6763                  */
6764                 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
6765                         dw2 |= 0x10000000;
6766         }
6767
6768         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6769         amdgpu_ring_write(ring, dw2);
6770         amdgpu_ring_write(ring, 0);
6771
6772         if (amdgpu_sriov_vf(ring->adev))
6773                 gfx_v8_0_ring_emit_de_meta_init(ring,
6774                         (flags & AMDGPU_VM_DOMAIN) ? AMDGPU_CSA_VADDR : ring->adev->virt.csa_vmid0_addr);
6775 }
6776
6777 static unsigned gfx_v8_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
6778 {
6779         unsigned ret;
6780
6781         amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
6782         amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
6783         amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
6784         amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
6785         ret = ring->wptr & ring->buf_mask;
6786         amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
6787         return ret;
6788 }
6789
6790 static void gfx_v8_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
6791 {
6792         unsigned cur;
6793
6794         BUG_ON(offset > ring->buf_mask);
6795         BUG_ON(ring->ring[offset] != 0x55aa55aa);
6796
6797         cur = (ring->wptr & ring->buf_mask) - 1;
6798         if (likely(cur > offset))
6799                 ring->ring[offset] = cur - offset;
6800         else
6801                 ring->ring[offset] = (ring->ring_size >> 2) - offset + cur;
6802 }
6803
6804
6805 static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
6806 {
6807         struct amdgpu_device *adev = ring->adev;
6808
6809         amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
6810         amdgpu_ring_write(ring, 0 |     /* src: register*/
6811                                 (5 << 8) |      /* dst: memory */
6812                                 (1 << 20));     /* write confirm */
6813         amdgpu_ring_write(ring, reg);
6814         amdgpu_ring_write(ring, 0);
6815         amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
6816                                 adev->virt.reg_val_offs * 4));
6817         amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
6818                                 adev->virt.reg_val_offs * 4));
6819 }
6820
6821 static void gfx_v8_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
6822                                   uint32_t val)
6823 {
6824         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6825         amdgpu_ring_write(ring, (1 << 16)); /* no inc addr */
6826         amdgpu_ring_write(ring, reg);
6827         amdgpu_ring_write(ring, 0);
6828         amdgpu_ring_write(ring, val);
6829 }
6830
6831 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
6832                                                  enum amdgpu_interrupt_state state)
6833 {
6834         WREG32_FIELD(CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE,
6835                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6836 }
6837
6838 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
6839                                                      int me, int pipe,
6840                                                      enum amdgpu_interrupt_state state)
6841 {
6842         /*
6843          * amdgpu controls only pipe 0 of MEC1. That's why this function only
6844          * handles the setting of interrupts for this specific pipe. All other
6845          * pipes' interrupts are set by amdkfd.
6846          */
6847
6848         if (me == 1) {
6849                 switch (pipe) {
6850                 case 0:
6851                         break;
6852                 default:
6853                         DRM_DEBUG("invalid pipe %d\n", pipe);
6854                         return;
6855                 }
6856         } else {
6857                 DRM_DEBUG("invalid me %d\n", me);
6858                 return;
6859         }
6860
6861         WREG32_FIELD(CP_ME1_PIPE0_INT_CNTL, TIME_STAMP_INT_ENABLE,
6862                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6863 }
6864
6865 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
6866                                              struct amdgpu_irq_src *source,
6867                                              unsigned type,
6868                                              enum amdgpu_interrupt_state state)
6869 {
6870         WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE,
6871                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6872
6873         return 0;
6874 }
6875
6876 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
6877                                               struct amdgpu_irq_src *source,
6878                                               unsigned type,
6879                                               enum amdgpu_interrupt_state state)
6880 {
6881         WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE,
6882                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6883
6884         return 0;
6885 }
6886
6887 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6888                                             struct amdgpu_irq_src *src,
6889                                             unsigned type,
6890                                             enum amdgpu_interrupt_state state)
6891 {
6892         switch (type) {
6893         case AMDGPU_CP_IRQ_GFX_EOP:
6894                 gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
6895                 break;
6896         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6897                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6898                 break;
6899         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6900                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6901                 break;
6902         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6903                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6904                 break;
6905         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6906                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6907                 break;
6908         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6909                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6910                 break;
6911         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6912                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6913                 break;
6914         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6915                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6916                 break;
6917         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6918                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6919                 break;
6920         default:
6921                 break;
6922         }
6923         return 0;
6924 }
6925
6926 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
6927                             struct amdgpu_irq_src *source,
6928                             struct amdgpu_iv_entry *entry)
6929 {
6930         int i;
6931         u8 me_id, pipe_id, queue_id;
6932         struct amdgpu_ring *ring;
6933
6934         DRM_DEBUG("IH: CP EOP\n");
6935         me_id = (entry->ring_id & 0x0c) >> 2;
6936         pipe_id = (entry->ring_id & 0x03) >> 0;
6937         queue_id = (entry->ring_id & 0x70) >> 4;
6938
6939         switch (me_id) {
6940         case 0:
6941                 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6942                 break;
6943         case 1:
6944         case 2:
6945                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6946                         ring = &adev->gfx.compute_ring[i];
6947                         /* Per-queue interrupt is supported for MEC starting from VI.
6948                           * The interrupt can only be enabled/disabled per pipe instead of per queue.
6949                           */
6950                         if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6951                                 amdgpu_fence_process(ring);
6952                 }
6953                 break;
6954         }
6955         return 0;
6956 }
6957
6958 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
6959                                  struct amdgpu_irq_src *source,
6960                                  struct amdgpu_iv_entry *entry)
6961 {
6962         DRM_ERROR("Illegal register access in command stream\n");
6963         schedule_work(&adev->reset_work);
6964         return 0;
6965 }
6966
6967 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
6968                                   struct amdgpu_irq_src *source,
6969                                   struct amdgpu_iv_entry *entry)
6970 {
6971         DRM_ERROR("Illegal instruction in command stream\n");
6972         schedule_work(&adev->reset_work);
6973         return 0;
6974 }
6975
6976 static int gfx_v8_0_kiq_set_interrupt_state(struct amdgpu_device *adev,
6977                                             struct amdgpu_irq_src *src,
6978                                             unsigned int type,
6979                                             enum amdgpu_interrupt_state state)
6980 {
6981         struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
6982
6983         BUG_ON(ring->funcs->type != AMDGPU_RING_TYPE_KIQ);
6984
6985         switch (type) {
6986         case AMDGPU_CP_KIQ_IRQ_DRIVER0:
6987                 WREG32_FIELD(CPC_INT_CNTL, GENERIC2_INT_ENABLE,
6988                              state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6989                 if (ring->me == 1)
6990                         WREG32_FIELD_OFFSET(CP_ME1_PIPE0_INT_CNTL,
6991                                      ring->pipe,
6992                                      GENERIC2_INT_ENABLE,
6993                                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6994                 else
6995                         WREG32_FIELD_OFFSET(CP_ME2_PIPE0_INT_CNTL,
6996                                      ring->pipe,
6997                                      GENERIC2_INT_ENABLE,
6998                                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6999                 break;
7000         default:
7001                 BUG(); /* kiq only support GENERIC2_INT now */
7002                 break;
7003         }
7004         return 0;
7005 }
7006
7007 static int gfx_v8_0_kiq_irq(struct amdgpu_device *adev,
7008                             struct amdgpu_irq_src *source,
7009                             struct amdgpu_iv_entry *entry)
7010 {
7011         u8 me_id, pipe_id, queue_id;
7012         struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
7013
7014         BUG_ON(ring->funcs->type != AMDGPU_RING_TYPE_KIQ);
7015
7016         me_id = (entry->ring_id & 0x0c) >> 2;
7017         pipe_id = (entry->ring_id & 0x03) >> 0;
7018         queue_id = (entry->ring_id & 0x70) >> 4;
7019         DRM_DEBUG("IH: CPC GENERIC2_INT, me:%d, pipe:%d, queue:%d\n",
7020                    me_id, pipe_id, queue_id);
7021
7022         amdgpu_fence_process(ring);
7023         return 0;
7024 }
7025
7026 static const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
7027         .name = "gfx_v8_0",
7028         .early_init = gfx_v8_0_early_init,
7029         .late_init = gfx_v8_0_late_init,
7030         .sw_init = gfx_v8_0_sw_init,
7031         .sw_fini = gfx_v8_0_sw_fini,
7032         .hw_init = gfx_v8_0_hw_init,
7033         .hw_fini = gfx_v8_0_hw_fini,
7034         .suspend = gfx_v8_0_suspend,
7035         .resume = gfx_v8_0_resume,
7036         .is_idle = gfx_v8_0_is_idle,
7037         .wait_for_idle = gfx_v8_0_wait_for_idle,
7038         .check_soft_reset = gfx_v8_0_check_soft_reset,
7039         .pre_soft_reset = gfx_v8_0_pre_soft_reset,
7040         .soft_reset = gfx_v8_0_soft_reset,
7041         .post_soft_reset = gfx_v8_0_post_soft_reset,
7042         .set_clockgating_state = gfx_v8_0_set_clockgating_state,
7043         .set_powergating_state = gfx_v8_0_set_powergating_state,
7044         .get_clockgating_state = gfx_v8_0_get_clockgating_state,
7045 };
7046
7047 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
7048         .type = AMDGPU_RING_TYPE_GFX,
7049         .align_mask = 0xff,
7050         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
7051         .support_64bit_ptrs = false,
7052         .get_rptr = gfx_v8_0_ring_get_rptr,
7053         .get_wptr = gfx_v8_0_ring_get_wptr_gfx,
7054         .set_wptr = gfx_v8_0_ring_set_wptr_gfx,
7055         .emit_frame_size = /* maximum 215dw if count 16 IBs in */
7056                 5 +  /* COND_EXEC */
7057                 7 +  /* PIPELINE_SYNC */
7058                 19 + /* VM_FLUSH */
7059                 8 +  /* FENCE for VM_FLUSH */
7060                 20 + /* GDS switch */
7061                 4 + /* double SWITCH_BUFFER,
7062                        the first COND_EXEC jump to the place just
7063                            prior to this double SWITCH_BUFFER  */
7064                 5 + /* COND_EXEC */
7065                 7 +      /*     HDP_flush */
7066                 4 +      /*     VGT_flush */
7067                 14 + /* CE_META */
7068                 31 + /* DE_META */
7069                 3 + /* CNTX_CTRL */
7070                 5 + /* HDP_INVL */
7071                 8 + 8 + /* FENCE x2 */
7072                 2, /* SWITCH_BUFFER */
7073         .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_gfx */
7074         .emit_ib = gfx_v8_0_ring_emit_ib_gfx,
7075         .emit_fence = gfx_v8_0_ring_emit_fence_gfx,
7076         .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
7077         .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
7078         .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
7079         .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
7080         .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
7081         .test_ring = gfx_v8_0_ring_test_ring,
7082         .test_ib = gfx_v8_0_ring_test_ib,
7083         .insert_nop = amdgpu_ring_insert_nop,
7084         .pad_ib = amdgpu_ring_generic_pad_ib,
7085         .emit_switch_buffer = gfx_v8_ring_emit_sb,
7086         .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
7087         .init_cond_exec = gfx_v8_0_ring_emit_init_cond_exec,
7088         .patch_cond_exec = gfx_v8_0_ring_emit_patch_cond_exec,
7089 };
7090
7091 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
7092         .type = AMDGPU_RING_TYPE_COMPUTE,
7093         .align_mask = 0xff,
7094         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
7095         .support_64bit_ptrs = false,
7096         .get_rptr = gfx_v8_0_ring_get_rptr,
7097         .get_wptr = gfx_v8_0_ring_get_wptr_compute,
7098         .set_wptr = gfx_v8_0_ring_set_wptr_compute,
7099         .emit_frame_size =
7100                 20 + /* gfx_v8_0_ring_emit_gds_switch */
7101                 7 + /* gfx_v8_0_ring_emit_hdp_flush */
7102                 5 + /* gfx_v8_0_ring_emit_hdp_invalidate */
7103                 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
7104                 17 + /* gfx_v8_0_ring_emit_vm_flush */
7105                 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
7106         .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */
7107         .emit_ib = gfx_v8_0_ring_emit_ib_compute,
7108         .emit_fence = gfx_v8_0_ring_emit_fence_compute,
7109         .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
7110         .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
7111         .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
7112         .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
7113         .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
7114         .test_ring = gfx_v8_0_ring_test_ring,
7115         .test_ib = gfx_v8_0_ring_test_ib,
7116         .insert_nop = amdgpu_ring_insert_nop,
7117         .pad_ib = amdgpu_ring_generic_pad_ib,
7118 };
7119
7120 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {
7121         .type = AMDGPU_RING_TYPE_KIQ,
7122         .align_mask = 0xff,
7123         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
7124         .support_64bit_ptrs = false,
7125         .get_rptr = gfx_v8_0_ring_get_rptr,
7126         .get_wptr = gfx_v8_0_ring_get_wptr_compute,
7127         .set_wptr = gfx_v8_0_ring_set_wptr_compute,
7128         .emit_frame_size =
7129                 20 + /* gfx_v8_0_ring_emit_gds_switch */
7130                 7 + /* gfx_v8_0_ring_emit_hdp_flush */
7131                 5 + /* gfx_v8_0_ring_emit_hdp_invalidate */
7132                 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
7133                 17 + /* gfx_v8_0_ring_emit_vm_flush */
7134                 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */
7135         .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */
7136         .emit_ib = gfx_v8_0_ring_emit_ib_compute,
7137         .emit_fence = gfx_v8_0_ring_emit_fence_kiq,
7138         .test_ring = gfx_v8_0_ring_test_ring,
7139         .test_ib = gfx_v8_0_ring_test_ib,
7140         .insert_nop = amdgpu_ring_insert_nop,
7141         .pad_ib = amdgpu_ring_generic_pad_ib,
7142         .emit_rreg = gfx_v8_0_ring_emit_rreg,
7143         .emit_wreg = gfx_v8_0_ring_emit_wreg,
7144 };
7145
7146 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
7147 {
7148         int i;
7149
7150         adev->gfx.kiq.ring.funcs = &gfx_v8_0_ring_funcs_kiq;
7151
7152         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
7153                 adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
7154
7155         for (i = 0; i < adev->gfx.num_compute_rings; i++)
7156                 adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
7157 }
7158
7159 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
7160         .set = gfx_v8_0_set_eop_interrupt_state,
7161         .process = gfx_v8_0_eop_irq,
7162 };
7163
7164 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
7165         .set = gfx_v8_0_set_priv_reg_fault_state,
7166         .process = gfx_v8_0_priv_reg_irq,
7167 };
7168
7169 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
7170         .set = gfx_v8_0_set_priv_inst_fault_state,
7171         .process = gfx_v8_0_priv_inst_irq,
7172 };
7173
7174 static const struct amdgpu_irq_src_funcs gfx_v8_0_kiq_irq_funcs = {
7175         .set = gfx_v8_0_kiq_set_interrupt_state,
7176         .process = gfx_v8_0_kiq_irq,
7177 };
7178
7179 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
7180 {
7181         adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
7182         adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
7183
7184         adev->gfx.priv_reg_irq.num_types = 1;
7185         adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
7186
7187         adev->gfx.priv_inst_irq.num_types = 1;
7188         adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
7189
7190         adev->gfx.kiq.irq.num_types = AMDGPU_CP_KIQ_IRQ_LAST;
7191         adev->gfx.kiq.irq.funcs = &gfx_v8_0_kiq_irq_funcs;
7192 }
7193
7194 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
7195 {
7196         adev->gfx.rlc.funcs = &iceland_rlc_funcs;
7197 }
7198
7199 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
7200 {
7201         /* init asci gds info */
7202         adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
7203         adev->gds.gws.total_size = 64;
7204         adev->gds.oa.total_size = 16;
7205
7206         if (adev->gds.mem.total_size == 64 * 1024) {
7207                 adev->gds.mem.gfx_partition_size = 4096;
7208                 adev->gds.mem.cs_partition_size = 4096;
7209
7210                 adev->gds.gws.gfx_partition_size = 4;
7211                 adev->gds.gws.cs_partition_size = 4;
7212
7213                 adev->gds.oa.gfx_partition_size = 4;
7214                 adev->gds.oa.cs_partition_size = 1;
7215         } else {
7216                 adev->gds.mem.gfx_partition_size = 1024;
7217                 adev->gds.mem.cs_partition_size = 1024;
7218
7219                 adev->gds.gws.gfx_partition_size = 16;
7220                 adev->gds.gws.cs_partition_size = 16;
7221
7222                 adev->gds.oa.gfx_partition_size = 4;
7223                 adev->gds.oa.cs_partition_size = 4;
7224         }
7225 }
7226
7227 static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
7228                                                  u32 bitmap)
7229 {
7230         u32 data;
7231
7232         if (!bitmap)
7233                 return;
7234
7235         data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7236         data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7237
7238         WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
7239 }
7240
7241 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
7242 {
7243         u32 data, mask;
7244
7245         data =  RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) |
7246                 RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
7247
7248         mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_cu_per_sh);
7249
7250         return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask;
7251 }
7252
7253 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
7254 {
7255         int i, j, k, counter, active_cu_number = 0;
7256         u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
7257         struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
7258         unsigned disable_masks[4 * 2];
7259
7260         memset(cu_info, 0, sizeof(*cu_info));
7261
7262         amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
7263
7264         mutex_lock(&adev->grbm_idx_mutex);
7265         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
7266                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
7267                         mask = 1;
7268                         ao_bitmap = 0;
7269                         counter = 0;
7270                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
7271                         if (i < 4 && j < 2)
7272                                 gfx_v8_0_set_user_cu_inactive_bitmap(
7273                                         adev, disable_masks[i * 2 + j]);
7274                         bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
7275                         cu_info->bitmap[i][j] = bitmap;
7276
7277                         for (k = 0; k < 16; k ++) {
7278                                 if (bitmap & mask) {
7279                                         if (counter < 2)
7280                                                 ao_bitmap |= mask;
7281                                         counter ++;
7282                                 }
7283                                 mask <<= 1;
7284                         }
7285                         active_cu_number += counter;
7286                         ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
7287                 }
7288         }
7289         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
7290         mutex_unlock(&adev->grbm_idx_mutex);
7291
7292         cu_info->number = active_cu_number;
7293         cu_info->ao_cu_mask = ao_cu_mask;
7294 }
7295
7296 const struct amdgpu_ip_block_version gfx_v8_0_ip_block =
7297 {
7298         .type = AMD_IP_BLOCK_TYPE_GFX,
7299         .major = 8,
7300         .minor = 0,
7301         .rev = 0,
7302         .funcs = &gfx_v8_0_ip_funcs,
7303 };
7304
7305 const struct amdgpu_ip_block_version gfx_v8_1_ip_block =
7306 {
7307         .type = AMD_IP_BLOCK_TYPE_GFX,
7308         .major = 8,
7309         .minor = 1,
7310         .rev = 0,
7311         .funcs = &gfx_v8_0_ip_funcs,
7312 };
7313
7314 static void gfx_v8_0_ring_emit_ce_meta_init(struct amdgpu_ring *ring, uint64_t csa_addr)
7315 {
7316         uint64_t ce_payload_addr;
7317         int cnt_ce;
7318         static union {
7319                 struct vi_ce_ib_state regular;
7320                 struct vi_ce_ib_state_chained_ib chained;
7321         } ce_payload = {};
7322
7323         if (ring->adev->virt.chained_ib_support) {
7324                 ce_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data_chained_ib, ce_payload);
7325                 cnt_ce = (sizeof(ce_payload.chained) >> 2) + 4 - 2;
7326         } else {
7327                 ce_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data, ce_payload);
7328                 cnt_ce = (sizeof(ce_payload.regular) >> 2) + 4 - 2;
7329         }
7330
7331         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_ce));
7332         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
7333                                 WRITE_DATA_DST_SEL(8) |
7334                                 WR_CONFIRM) |
7335                                 WRITE_DATA_CACHE_POLICY(0));
7336         amdgpu_ring_write(ring, lower_32_bits(ce_payload_addr));
7337         amdgpu_ring_write(ring, upper_32_bits(ce_payload_addr));
7338         amdgpu_ring_write_multiple(ring, (void *)&ce_payload, cnt_ce - 2);
7339 }
7340
7341 static void gfx_v8_0_ring_emit_de_meta_init(struct amdgpu_ring *ring, uint64_t csa_addr)
7342 {
7343         uint64_t de_payload_addr, gds_addr;
7344         int cnt_de;
7345         static union {
7346                 struct vi_de_ib_state regular;
7347                 struct vi_de_ib_state_chained_ib chained;
7348         } de_payload = {};
7349
7350         gds_addr = csa_addr + 4096;
7351         if (ring->adev->virt.chained_ib_support) {
7352                 de_payload.chained.gds_backup_addrlo = lower_32_bits(gds_addr);
7353                 de_payload.chained.gds_backup_addrhi = upper_32_bits(gds_addr);
7354                 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data_chained_ib, de_payload);
7355                 cnt_de = (sizeof(de_payload.chained) >> 2) + 4 - 2;
7356         } else {
7357                 de_payload.regular.gds_backup_addrlo = lower_32_bits(gds_addr);
7358                 de_payload.regular.gds_backup_addrhi = upper_32_bits(gds_addr);
7359                 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data, de_payload);
7360                 cnt_de = (sizeof(de_payload.regular) >> 2) + 4 - 2;
7361         }
7362
7363         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_de));
7364         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
7365                                 WRITE_DATA_DST_SEL(8) |
7366                                 WR_CONFIRM) |
7367                                 WRITE_DATA_CACHE_POLICY(0));
7368         amdgpu_ring_write(ring, lower_32_bits(de_payload_addr));
7369         amdgpu_ring_write(ring, upper_32_bits(de_payload_addr));
7370         amdgpu_ring_write_multiple(ring, (void *)&de_payload, cnt_de - 2);
7371 }
7372
7373 /* create MQD for each compute queue */
7374 static int gfx_v8_0_compute_mqd_sw_init(struct amdgpu_device *adev)
7375 {
7376         struct amdgpu_ring *ring = NULL;
7377         int r, i;
7378
7379         /* create MQD for KIQ */
7380         ring = &adev->gfx.kiq.ring;
7381         if (!ring->mqd_obj) {
7382                 r = amdgpu_bo_create_kernel(adev, sizeof(struct vi_mqd), PAGE_SIZE,
7383                                             AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj,
7384                                             &ring->mqd_gpu_addr, &ring->mqd_ptr);
7385                 if (r) {
7386                         dev_warn(adev->dev, "failed to create ring mqd ob (%d)", r);
7387                         return r;
7388                 }
7389
7390                 /* prepare MQD backup */
7391                 adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS] = kmalloc(sizeof(struct vi_mqd), GFP_KERNEL);
7392                 if (!adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS])
7393                                 dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name);
7394         }
7395
7396         /* create MQD for each KCQ */
7397         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
7398                 ring = &adev->gfx.compute_ring[i];
7399                 if (!ring->mqd_obj) {
7400                         r = amdgpu_bo_create_kernel(adev, sizeof(struct vi_mqd), PAGE_SIZE,
7401                                                     AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj,
7402                                                     &ring->mqd_gpu_addr, &ring->mqd_ptr);
7403                         if (r) {
7404                                 dev_warn(adev->dev, "failed to create ring mqd ob (%d)", r);
7405                                 return r;
7406                         }
7407
7408                         /* prepare MQD backup */
7409                         adev->gfx.mec.mqd_backup[i] = kmalloc(sizeof(struct vi_mqd), GFP_KERNEL);
7410                         if (!adev->gfx.mec.mqd_backup[i])
7411                                 dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name);
7412                 }
7413         }
7414
7415         return 0;
7416 }
7417
7418 static void gfx_v8_0_compute_mqd_sw_fini(struct amdgpu_device *adev)
7419 {
7420         struct amdgpu_ring *ring = NULL;
7421         int i;
7422
7423         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
7424                 ring = &adev->gfx.compute_ring[i];
7425                 kfree(adev->gfx.mec.mqd_backup[i]);
7426                 amdgpu_bo_free_kernel(&ring->mqd_obj,
7427                                       &ring->mqd_gpu_addr,
7428                                       &ring->mqd_ptr);
7429         }
7430
7431         ring = &adev->gfx.kiq.ring;
7432         kfree(adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS]);
7433         amdgpu_bo_free_kernel(&ring->mqd_obj,
7434                               &ring->mqd_gpu_addr,
7435                               &ring->mqd_ptr);
7436 }
This page took 0.473634 seconds and 4 git commands to generate.