]> Git Repo - linux.git/blob - drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
Merge tag 'selinux-pr-20180516' of git://git.kernel.org/pub/scm/linux/kernel/git...
[linux.git] / drivers / gpu / drm / amd / amdgpu / gfx_v8_0.c
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 #include <linux/kernel.h>
24 #include <linux/firmware.h>
25 #include <drm/drmP.h>
26 #include "amdgpu.h"
27 #include "amdgpu_gfx.h"
28 #include "vi.h"
29 #include "vi_structs.h"
30 #include "vid.h"
31 #include "amdgpu_ucode.h"
32 #include "amdgpu_atombios.h"
33 #include "atombios_i2c.h"
34 #include "clearstate_vi.h"
35
36 #include "gmc/gmc_8_2_d.h"
37 #include "gmc/gmc_8_2_sh_mask.h"
38
39 #include "oss/oss_3_0_d.h"
40 #include "oss/oss_3_0_sh_mask.h"
41
42 #include "bif/bif_5_0_d.h"
43 #include "bif/bif_5_0_sh_mask.h"
44 #include "gca/gfx_8_0_d.h"
45 #include "gca/gfx_8_0_enum.h"
46 #include "gca/gfx_8_0_sh_mask.h"
47 #include "gca/gfx_8_0_enum.h"
48
49 #include "dce/dce_10_0_d.h"
50 #include "dce/dce_10_0_sh_mask.h"
51
52 #include "smu/smu_7_1_3_d.h"
53
54 #define GFX8_NUM_GFX_RINGS     1
55 #define GFX8_MEC_HPD_SIZE 2048
56
57 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
58 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
59 #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
60 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
61
62 #define ARRAY_MODE(x)                                   ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
63 #define PIPE_CONFIG(x)                                  ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
64 #define TILE_SPLIT(x)                                   ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
65 #define MICRO_TILE_MODE_NEW(x)                          ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
66 #define SAMPLE_SPLIT(x)                                 ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
67 #define BANK_WIDTH(x)                                   ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
68 #define BANK_HEIGHT(x)                                  ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
69 #define MACRO_TILE_ASPECT(x)                            ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
70 #define NUM_BANKS(x)                                    ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
71
72 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK            0x00000001L
73 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK            0x00000002L
74 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK           0x00000004L
75 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK           0x00000008L
76 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK           0x00000010L
77 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK           0x00000020L
78
79 /* BPM SERDES CMD */
80 #define SET_BPM_SERDES_CMD    1
81 #define CLE_BPM_SERDES_CMD    0
82
83 /* BPM Register Address*/
84 enum {
85         BPM_REG_CGLS_EN = 0,        /* Enable/Disable CGLS */
86         BPM_REG_CGLS_ON,            /* ON/OFF CGLS: shall be controlled by RLC FW */
87         BPM_REG_CGCG_OVERRIDE,      /* Set/Clear CGCG Override */
88         BPM_REG_MGCG_OVERRIDE,      /* Set/Clear MGCG Override */
89         BPM_REG_FGCG_OVERRIDE,      /* Set/Clear FGCG Override */
90         BPM_REG_FGCG_MAX
91 };
92
93 #define RLC_FormatDirectRegListLength        14
94
95 MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
96 MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
97 MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
98 MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
99 MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
100 MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
101
102 MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
103 MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
104 MODULE_FIRMWARE("amdgpu/stoney_me.bin");
105 MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
106 MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
107
108 MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
109 MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
110 MODULE_FIRMWARE("amdgpu/tonga_me.bin");
111 MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
112 MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
113 MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
114
115 MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
116 MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
117 MODULE_FIRMWARE("amdgpu/topaz_me.bin");
118 MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
119 MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
120
121 MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
122 MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
123 MODULE_FIRMWARE("amdgpu/fiji_me.bin");
124 MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
125 MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
126 MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
127
128 MODULE_FIRMWARE("amdgpu/polaris11_ce.bin");
129 MODULE_FIRMWARE("amdgpu/polaris11_ce_2.bin");
130 MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin");
131 MODULE_FIRMWARE("amdgpu/polaris11_pfp_2.bin");
132 MODULE_FIRMWARE("amdgpu/polaris11_me.bin");
133 MODULE_FIRMWARE("amdgpu/polaris11_me_2.bin");
134 MODULE_FIRMWARE("amdgpu/polaris11_mec.bin");
135 MODULE_FIRMWARE("amdgpu/polaris11_mec_2.bin");
136 MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin");
137 MODULE_FIRMWARE("amdgpu/polaris11_mec2_2.bin");
138 MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin");
139
140 MODULE_FIRMWARE("amdgpu/polaris10_ce.bin");
141 MODULE_FIRMWARE("amdgpu/polaris10_ce_2.bin");
142 MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin");
143 MODULE_FIRMWARE("amdgpu/polaris10_pfp_2.bin");
144 MODULE_FIRMWARE("amdgpu/polaris10_me.bin");
145 MODULE_FIRMWARE("amdgpu/polaris10_me_2.bin");
146 MODULE_FIRMWARE("amdgpu/polaris10_mec.bin");
147 MODULE_FIRMWARE("amdgpu/polaris10_mec_2.bin");
148 MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin");
149 MODULE_FIRMWARE("amdgpu/polaris10_mec2_2.bin");
150 MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin");
151
152 MODULE_FIRMWARE("amdgpu/polaris12_ce.bin");
153 MODULE_FIRMWARE("amdgpu/polaris12_ce_2.bin");
154 MODULE_FIRMWARE("amdgpu/polaris12_pfp.bin");
155 MODULE_FIRMWARE("amdgpu/polaris12_pfp_2.bin");
156 MODULE_FIRMWARE("amdgpu/polaris12_me.bin");
157 MODULE_FIRMWARE("amdgpu/polaris12_me_2.bin");
158 MODULE_FIRMWARE("amdgpu/polaris12_mec.bin");
159 MODULE_FIRMWARE("amdgpu/polaris12_mec_2.bin");
160 MODULE_FIRMWARE("amdgpu/polaris12_mec2.bin");
161 MODULE_FIRMWARE("amdgpu/polaris12_mec2_2.bin");
162 MODULE_FIRMWARE("amdgpu/polaris12_rlc.bin");
163
164 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
165 {
166         {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
167         {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
168         {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
169         {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
170         {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
171         {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
172         {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
173         {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
174         {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
175         {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
176         {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
177         {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
178         {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
179         {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
180         {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
181         {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
182 };
183
184 static const u32 golden_settings_tonga_a11[] =
185 {
186         mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
187         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
188         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
189         mmGB_GPU_ID, 0x0000000f, 0x00000000,
190         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
191         mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
192         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
193         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
194         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
195         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
196         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
197         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
198         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
199         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
200         mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
201         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
202 };
203
204 static const u32 tonga_golden_common_all[] =
205 {
206         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
207         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
208         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
209         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
210         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
211         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
212         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
213         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
214 };
215
216 static const u32 tonga_mgcg_cgcg_init[] =
217 {
218         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
219         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
220         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
221         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
222         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
223         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
224         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
225         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
226         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
227         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
228         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
229         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
230         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
231         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
232         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
233         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
234         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
235         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
236         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
237         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
238         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
239         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
240         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
241         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
242         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
243         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
244         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
245         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
246         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
247         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
248         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
249         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
250         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
251         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
252         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
253         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
254         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
255         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
256         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
257         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
258         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
259         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
260         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
261         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
262         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
263         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
264         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
265         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
266         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
267         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
268         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
269         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
270         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
271         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
272         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
273         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
274         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
275         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
276         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
277         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
278         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
279         mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
280         mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
281         mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
282         mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
283         mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
284         mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
285         mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
286         mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
287         mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
288         mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
289         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
290         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
291         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
292         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
293 };
294
295 static const u32 golden_settings_polaris11_a11[] =
296 {
297         mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208,
298         mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
299         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
300         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
301         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
302         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
303         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
304         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
305         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
306         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
307         mmSQ_CONFIG, 0x07f80000, 0x01180000,
308         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
309         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
310         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
311         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
312         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
313         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
314 };
315
316 static const u32 polaris11_golden_common_all[] =
317 {
318         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
319         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002,
320         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
321         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
322         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
323         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
324 };
325
326 static const u32 golden_settings_polaris10_a11[] =
327 {
328         mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
329         mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
330         mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
331         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
332         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
333         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
334         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
335         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
336         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a,
337         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
338         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
339         mmSQ_CONFIG, 0x07f80000, 0x07180000,
340         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
341         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
342         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
343         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
344         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
345 };
346
347 static const u32 polaris10_golden_common_all[] =
348 {
349         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
350         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
351         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
352         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
353         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
354         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
355         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
356         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
357 };
358
359 static const u32 fiji_golden_common_all[] =
360 {
361         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
362         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
363         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
364         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
365         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
366         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
367         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
368         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
369         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
370         mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
371 };
372
373 static const u32 golden_settings_fiji_a10[] =
374 {
375         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
376         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
377         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
378         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
379         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
380         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
381         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
382         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
383         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
384         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
385         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
386 };
387
388 static const u32 fiji_mgcg_cgcg_init[] =
389 {
390         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
391         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
392         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
393         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
394         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
395         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
396         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
397         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
398         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
399         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
400         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
401         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
402         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
403         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
404         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
405         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
406         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
407         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
408         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
409         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
410         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
411         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
412         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
413         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
414         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
415         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
416         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
417         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
418         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
419         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
420         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
421         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
422         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
423         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
424         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
425 };
426
427 static const u32 golden_settings_iceland_a11[] =
428 {
429         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
430         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
431         mmDB_DEBUG3, 0xc0000000, 0xc0000000,
432         mmGB_GPU_ID, 0x0000000f, 0x00000000,
433         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
434         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
435         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
436         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
437         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
438         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
439         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
440         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
441         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
442         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
443         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
444         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
445 };
446
447 static const u32 iceland_golden_common_all[] =
448 {
449         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
450         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
451         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
452         mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
453         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
454         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
455         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
456         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
457 };
458
459 static const u32 iceland_mgcg_cgcg_init[] =
460 {
461         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
462         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
463         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
464         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
465         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
466         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
467         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
468         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
469         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
470         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
471         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
472         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
473         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
474         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
475         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
476         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
477         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
478         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
479         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
480         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
481         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
482         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
483         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
484         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
485         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
486         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
487         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
488         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
489         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
490         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
491         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
492         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
493         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
494         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
495         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
496         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
497         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
498         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
499         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
500         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
501         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
502         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
503         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
504         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
505         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
506         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
507         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
508         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
509         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
510         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
511         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
512         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
513         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
514         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
515         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
516         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
517         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
518         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
519         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
520         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
521         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
522         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
523         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
524         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
525 };
526
527 static const u32 cz_golden_settings_a11[] =
528 {
529         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
530         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
531         mmGB_GPU_ID, 0x0000000f, 0x00000000,
532         mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
533         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
534         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
535         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
536         mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
537         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
538         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
539         mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
540         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
541 };
542
543 static const u32 cz_golden_common_all[] =
544 {
545         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
546         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
547         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
548         mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
549         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
550         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
551         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
552         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
553 };
554
555 static const u32 cz_mgcg_cgcg_init[] =
556 {
557         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
558         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
559         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
560         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
561         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
562         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
563         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
564         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
565         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
566         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
567         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
568         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
569         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
570         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
571         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
572         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
573         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
574         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
575         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
576         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
577         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
578         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
579         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
580         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
581         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
582         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
583         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
584         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
585         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
586         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
587         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
588         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
589         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
590         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
591         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
592         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
593         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
594         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
595         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
596         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
597         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
598         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
599         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
600         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
601         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
602         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
603         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
604         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
605         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
606         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
607         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
608         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
609         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
610         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
611         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
612         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
613         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
614         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
615         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
616         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
617         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
618         mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
619         mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
620         mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
621         mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
622         mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
623         mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
624         mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
625         mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
626         mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
627         mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
628         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
629         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
630         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
631         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
632 };
633
634 static const u32 stoney_golden_settings_a11[] =
635 {
636         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
637         mmGB_GPU_ID, 0x0000000f, 0x00000000,
638         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
639         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
640         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
641         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
642         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
643         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
644         mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
645         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
646 };
647
648 static const u32 stoney_golden_common_all[] =
649 {
650         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
651         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
652         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
653         mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
654         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
655         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
656         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
657         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
658 };
659
660 static const u32 stoney_mgcg_cgcg_init[] =
661 {
662         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
663         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
664         mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
665         mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
666         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
667 };
668
669 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
670 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
671 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
672 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
673 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
674 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
675 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring);
676 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring);
677
678 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
679 {
680         switch (adev->asic_type) {
681         case CHIP_TOPAZ:
682                 amdgpu_device_program_register_sequence(adev,
683                                                         iceland_mgcg_cgcg_init,
684                                                         ARRAY_SIZE(iceland_mgcg_cgcg_init));
685                 amdgpu_device_program_register_sequence(adev,
686                                                         golden_settings_iceland_a11,
687                                                         ARRAY_SIZE(golden_settings_iceland_a11));
688                 amdgpu_device_program_register_sequence(adev,
689                                                         iceland_golden_common_all,
690                                                         ARRAY_SIZE(iceland_golden_common_all));
691                 break;
692         case CHIP_FIJI:
693                 amdgpu_device_program_register_sequence(adev,
694                                                         fiji_mgcg_cgcg_init,
695                                                         ARRAY_SIZE(fiji_mgcg_cgcg_init));
696                 amdgpu_device_program_register_sequence(adev,
697                                                         golden_settings_fiji_a10,
698                                                         ARRAY_SIZE(golden_settings_fiji_a10));
699                 amdgpu_device_program_register_sequence(adev,
700                                                         fiji_golden_common_all,
701                                                         ARRAY_SIZE(fiji_golden_common_all));
702                 break;
703
704         case CHIP_TONGA:
705                 amdgpu_device_program_register_sequence(adev,
706                                                         tonga_mgcg_cgcg_init,
707                                                         ARRAY_SIZE(tonga_mgcg_cgcg_init));
708                 amdgpu_device_program_register_sequence(adev,
709                                                         golden_settings_tonga_a11,
710                                                         ARRAY_SIZE(golden_settings_tonga_a11));
711                 amdgpu_device_program_register_sequence(adev,
712                                                         tonga_golden_common_all,
713                                                         ARRAY_SIZE(tonga_golden_common_all));
714                 break;
715         case CHIP_POLARIS11:
716         case CHIP_POLARIS12:
717                 amdgpu_device_program_register_sequence(adev,
718                                                         golden_settings_polaris11_a11,
719                                                         ARRAY_SIZE(golden_settings_polaris11_a11));
720                 amdgpu_device_program_register_sequence(adev,
721                                                         polaris11_golden_common_all,
722                                                         ARRAY_SIZE(polaris11_golden_common_all));
723                 break;
724         case CHIP_POLARIS10:
725                 amdgpu_device_program_register_sequence(adev,
726                                                         golden_settings_polaris10_a11,
727                                                         ARRAY_SIZE(golden_settings_polaris10_a11));
728                 amdgpu_device_program_register_sequence(adev,
729                                                         polaris10_golden_common_all,
730                                                         ARRAY_SIZE(polaris10_golden_common_all));
731                 WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C);
732                 if (adev->pdev->revision == 0xc7 &&
733                     ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) ||
734                      (adev->pdev->subsystem_device == 0x4a8 && adev->pdev->subsystem_vendor == 0x1043) ||
735                      (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1682))) {
736                         amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD);
737                         amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0);
738                 }
739                 break;
740         case CHIP_CARRIZO:
741                 amdgpu_device_program_register_sequence(adev,
742                                                         cz_mgcg_cgcg_init,
743                                                         ARRAY_SIZE(cz_mgcg_cgcg_init));
744                 amdgpu_device_program_register_sequence(adev,
745                                                         cz_golden_settings_a11,
746                                                         ARRAY_SIZE(cz_golden_settings_a11));
747                 amdgpu_device_program_register_sequence(adev,
748                                                         cz_golden_common_all,
749                                                         ARRAY_SIZE(cz_golden_common_all));
750                 break;
751         case CHIP_STONEY:
752                 amdgpu_device_program_register_sequence(adev,
753                                                         stoney_mgcg_cgcg_init,
754                                                         ARRAY_SIZE(stoney_mgcg_cgcg_init));
755                 amdgpu_device_program_register_sequence(adev,
756                                                         stoney_golden_settings_a11,
757                                                         ARRAY_SIZE(stoney_golden_settings_a11));
758                 amdgpu_device_program_register_sequence(adev,
759                                                         stoney_golden_common_all,
760                                                         ARRAY_SIZE(stoney_golden_common_all));
761                 break;
762         default:
763                 break;
764         }
765 }
766
767 static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
768 {
769         adev->gfx.scratch.num_reg = 8;
770         adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
771         adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
772 }
773
774 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
775 {
776         struct amdgpu_device *adev = ring->adev;
777         uint32_t scratch;
778         uint32_t tmp = 0;
779         unsigned i;
780         int r;
781
782         r = amdgpu_gfx_scratch_get(adev, &scratch);
783         if (r) {
784                 DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
785                 return r;
786         }
787         WREG32(scratch, 0xCAFEDEAD);
788         r = amdgpu_ring_alloc(ring, 3);
789         if (r) {
790                 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
791                           ring->idx, r);
792                 amdgpu_gfx_scratch_free(adev, scratch);
793                 return r;
794         }
795         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
796         amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
797         amdgpu_ring_write(ring, 0xDEADBEEF);
798         amdgpu_ring_commit(ring);
799
800         for (i = 0; i < adev->usec_timeout; i++) {
801                 tmp = RREG32(scratch);
802                 if (tmp == 0xDEADBEEF)
803                         break;
804                 DRM_UDELAY(1);
805         }
806         if (i < adev->usec_timeout) {
807                 DRM_DEBUG("ring test on %d succeeded in %d usecs\n",
808                          ring->idx, i);
809         } else {
810                 DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
811                           ring->idx, scratch, tmp);
812                 r = -EINVAL;
813         }
814         amdgpu_gfx_scratch_free(adev, scratch);
815         return r;
816 }
817
818 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
819 {
820         struct amdgpu_device *adev = ring->adev;
821         struct amdgpu_ib ib;
822         struct dma_fence *f = NULL;
823         uint32_t scratch;
824         uint32_t tmp = 0;
825         long r;
826
827         r = amdgpu_gfx_scratch_get(adev, &scratch);
828         if (r) {
829                 DRM_ERROR("amdgpu: failed to get scratch reg (%ld).\n", r);
830                 return r;
831         }
832         WREG32(scratch, 0xCAFEDEAD);
833         memset(&ib, 0, sizeof(ib));
834         r = amdgpu_ib_get(adev, NULL, 256, &ib);
835         if (r) {
836                 DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
837                 goto err1;
838         }
839         ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
840         ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START));
841         ib.ptr[2] = 0xDEADBEEF;
842         ib.length_dw = 3;
843
844         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
845         if (r)
846                 goto err2;
847
848         r = dma_fence_wait_timeout(f, false, timeout);
849         if (r == 0) {
850                 DRM_ERROR("amdgpu: IB test timed out.\n");
851                 r = -ETIMEDOUT;
852                 goto err2;
853         } else if (r < 0) {
854                 DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
855                 goto err2;
856         }
857         tmp = RREG32(scratch);
858         if (tmp == 0xDEADBEEF) {
859                 DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx);
860                 r = 0;
861         } else {
862                 DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n",
863                           scratch, tmp);
864                 r = -EINVAL;
865         }
866 err2:
867         amdgpu_ib_free(adev, &ib, NULL);
868         dma_fence_put(f);
869 err1:
870         amdgpu_gfx_scratch_free(adev, scratch);
871         return r;
872 }
873
874
875 static void gfx_v8_0_free_microcode(struct amdgpu_device *adev)
876 {
877         release_firmware(adev->gfx.pfp_fw);
878         adev->gfx.pfp_fw = NULL;
879         release_firmware(adev->gfx.me_fw);
880         adev->gfx.me_fw = NULL;
881         release_firmware(adev->gfx.ce_fw);
882         adev->gfx.ce_fw = NULL;
883         release_firmware(adev->gfx.rlc_fw);
884         adev->gfx.rlc_fw = NULL;
885         release_firmware(adev->gfx.mec_fw);
886         adev->gfx.mec_fw = NULL;
887         if ((adev->asic_type != CHIP_STONEY) &&
888             (adev->asic_type != CHIP_TOPAZ))
889                 release_firmware(adev->gfx.mec2_fw);
890         adev->gfx.mec2_fw = NULL;
891
892         kfree(adev->gfx.rlc.register_list_format);
893 }
894
895 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
896 {
897         const char *chip_name;
898         char fw_name[30];
899         int err;
900         struct amdgpu_firmware_info *info = NULL;
901         const struct common_firmware_header *header = NULL;
902         const struct gfx_firmware_header_v1_0 *cp_hdr;
903         const struct rlc_firmware_header_v2_0 *rlc_hdr;
904         unsigned int *tmp = NULL, i;
905
906         DRM_DEBUG("\n");
907
908         switch (adev->asic_type) {
909         case CHIP_TOPAZ:
910                 chip_name = "topaz";
911                 break;
912         case CHIP_TONGA:
913                 chip_name = "tonga";
914                 break;
915         case CHIP_CARRIZO:
916                 chip_name = "carrizo";
917                 break;
918         case CHIP_FIJI:
919                 chip_name = "fiji";
920                 break;
921         case CHIP_POLARIS11:
922                 chip_name = "polaris11";
923                 break;
924         case CHIP_POLARIS10:
925                 chip_name = "polaris10";
926                 break;
927         case CHIP_POLARIS12:
928                 chip_name = "polaris12";
929                 break;
930         case CHIP_STONEY:
931                 chip_name = "stoney";
932                 break;
933         default:
934                 BUG();
935         }
936
937         if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
938                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp_2.bin", chip_name);
939                 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
940                 if (err == -ENOENT) {
941                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
942                         err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
943                 }
944         } else {
945                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
946                 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
947         }
948         if (err)
949                 goto out;
950         err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
951         if (err)
952                 goto out;
953         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
954         adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
955         adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
956
957         if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
958                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me_2.bin", chip_name);
959                 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
960                 if (err == -ENOENT) {
961                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
962                         err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
963                 }
964         } else {
965                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
966                 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
967         }
968         if (err)
969                 goto out;
970         err = amdgpu_ucode_validate(adev->gfx.me_fw);
971         if (err)
972                 goto out;
973         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
974         adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
975
976         adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
977
978         if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
979                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce_2.bin", chip_name);
980                 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
981                 if (err == -ENOENT) {
982                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
983                         err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
984                 }
985         } else {
986                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
987                 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
988         }
989         if (err)
990                 goto out;
991         err = amdgpu_ucode_validate(adev->gfx.ce_fw);
992         if (err)
993                 goto out;
994         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
995         adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
996         adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
997
998         /*
999          * Support for MCBP/Virtualization in combination with chained IBs is
1000          * formal released on feature version #46
1001          */
1002         if (adev->gfx.ce_feature_version >= 46 &&
1003             adev->gfx.pfp_feature_version >= 46) {
1004                 adev->virt.chained_ib_support = true;
1005                 DRM_INFO("Chained IB support enabled!\n");
1006         } else
1007                 adev->virt.chained_ib_support = false;
1008
1009         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
1010         err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
1011         if (err)
1012                 goto out;
1013         err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
1014         rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1015         adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
1016         adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
1017
1018         adev->gfx.rlc.save_and_restore_offset =
1019                         le32_to_cpu(rlc_hdr->save_and_restore_offset);
1020         adev->gfx.rlc.clear_state_descriptor_offset =
1021                         le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
1022         adev->gfx.rlc.avail_scratch_ram_locations =
1023                         le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
1024         adev->gfx.rlc.reg_restore_list_size =
1025                         le32_to_cpu(rlc_hdr->reg_restore_list_size);
1026         adev->gfx.rlc.reg_list_format_start =
1027                         le32_to_cpu(rlc_hdr->reg_list_format_start);
1028         adev->gfx.rlc.reg_list_format_separate_start =
1029                         le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
1030         adev->gfx.rlc.starting_offsets_start =
1031                         le32_to_cpu(rlc_hdr->starting_offsets_start);
1032         adev->gfx.rlc.reg_list_format_size_bytes =
1033                         le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
1034         adev->gfx.rlc.reg_list_size_bytes =
1035                         le32_to_cpu(rlc_hdr->reg_list_size_bytes);
1036
1037         adev->gfx.rlc.register_list_format =
1038                         kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
1039                                         adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
1040
1041         if (!adev->gfx.rlc.register_list_format) {
1042                 err = -ENOMEM;
1043                 goto out;
1044         }
1045
1046         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1047                         le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1048         for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++)
1049                 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
1050
1051         adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1052
1053         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1054                         le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1055         for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++)
1056                 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
1057
1058         if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1059                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec_2.bin", chip_name);
1060                 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1061                 if (err == -ENOENT) {
1062                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1063                         err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1064                 }
1065         } else {
1066                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1067                 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1068         }
1069         if (err)
1070                 goto out;
1071         err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1072         if (err)
1073                 goto out;
1074         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1075         adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1076         adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1077
1078         if ((adev->asic_type != CHIP_STONEY) &&
1079             (adev->asic_type != CHIP_TOPAZ)) {
1080                 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1081                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2_2.bin", chip_name);
1082                         err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1083                         if (err == -ENOENT) {
1084                                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1085                                 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1086                         }
1087                 } else {
1088                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1089                         err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1090                 }
1091                 if (!err) {
1092                         err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1093                         if (err)
1094                                 goto out;
1095                         cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1096                                 adev->gfx.mec2_fw->data;
1097                         adev->gfx.mec2_fw_version =
1098                                 le32_to_cpu(cp_hdr->header.ucode_version);
1099                         adev->gfx.mec2_feature_version =
1100                                 le32_to_cpu(cp_hdr->ucode_feature_version);
1101                 } else {
1102                         err = 0;
1103                         adev->gfx.mec2_fw = NULL;
1104                 }
1105         }
1106
1107         if (adev->firmware.load_type == AMDGPU_FW_LOAD_SMU) {
1108                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1109                 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1110                 info->fw = adev->gfx.pfp_fw;
1111                 header = (const struct common_firmware_header *)info->fw->data;
1112                 adev->firmware.fw_size +=
1113                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1114
1115                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1116                 info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1117                 info->fw = adev->gfx.me_fw;
1118                 header = (const struct common_firmware_header *)info->fw->data;
1119                 adev->firmware.fw_size +=
1120                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1121
1122                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1123                 info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1124                 info->fw = adev->gfx.ce_fw;
1125                 header = (const struct common_firmware_header *)info->fw->data;
1126                 adev->firmware.fw_size +=
1127                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1128
1129                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1130                 info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1131                 info->fw = adev->gfx.rlc_fw;
1132                 header = (const struct common_firmware_header *)info->fw->data;
1133                 adev->firmware.fw_size +=
1134                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1135
1136                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1137                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1138                 info->fw = adev->gfx.mec_fw;
1139                 header = (const struct common_firmware_header *)info->fw->data;
1140                 adev->firmware.fw_size +=
1141                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1142
1143                 /* we need account JT in */
1144                 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1145                 adev->firmware.fw_size +=
1146                         ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE);
1147
1148                 if (amdgpu_sriov_vf(adev)) {
1149                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_STORAGE];
1150                         info->ucode_id = AMDGPU_UCODE_ID_STORAGE;
1151                         info->fw = adev->gfx.mec_fw;
1152                         adev->firmware.fw_size +=
1153                                 ALIGN(le32_to_cpu(64 * PAGE_SIZE), PAGE_SIZE);
1154                 }
1155
1156                 if (adev->gfx.mec2_fw) {
1157                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1158                         info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1159                         info->fw = adev->gfx.mec2_fw;
1160                         header = (const struct common_firmware_header *)info->fw->data;
1161                         adev->firmware.fw_size +=
1162                                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1163                 }
1164
1165         }
1166
1167 out:
1168         if (err) {
1169                 dev_err(adev->dev,
1170                         "gfx8: Failed to load firmware \"%s\"\n",
1171                         fw_name);
1172                 release_firmware(adev->gfx.pfp_fw);
1173                 adev->gfx.pfp_fw = NULL;
1174                 release_firmware(adev->gfx.me_fw);
1175                 adev->gfx.me_fw = NULL;
1176                 release_firmware(adev->gfx.ce_fw);
1177                 adev->gfx.ce_fw = NULL;
1178                 release_firmware(adev->gfx.rlc_fw);
1179                 adev->gfx.rlc_fw = NULL;
1180                 release_firmware(adev->gfx.mec_fw);
1181                 adev->gfx.mec_fw = NULL;
1182                 release_firmware(adev->gfx.mec2_fw);
1183                 adev->gfx.mec2_fw = NULL;
1184         }
1185         return err;
1186 }
1187
1188 static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
1189                                     volatile u32 *buffer)
1190 {
1191         u32 count = 0, i;
1192         const struct cs_section_def *sect = NULL;
1193         const struct cs_extent_def *ext = NULL;
1194
1195         if (adev->gfx.rlc.cs_data == NULL)
1196                 return;
1197         if (buffer == NULL)
1198                 return;
1199
1200         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1201         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1202
1203         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1204         buffer[count++] = cpu_to_le32(0x80000000);
1205         buffer[count++] = cpu_to_le32(0x80000000);
1206
1207         for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1208                 for (ext = sect->section; ext->extent != NULL; ++ext) {
1209                         if (sect->id == SECT_CONTEXT) {
1210                                 buffer[count++] =
1211                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1212                                 buffer[count++] = cpu_to_le32(ext->reg_index -
1213                                                 PACKET3_SET_CONTEXT_REG_START);
1214                                 for (i = 0; i < ext->reg_count; i++)
1215                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
1216                         } else {
1217                                 return;
1218                         }
1219                 }
1220         }
1221
1222         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1223         buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
1224                         PACKET3_SET_CONTEXT_REG_START);
1225         buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config);
1226         buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1);
1227
1228         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1229         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1230
1231         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1232         buffer[count++] = cpu_to_le32(0);
1233 }
1234
1235 static void cz_init_cp_jump_table(struct amdgpu_device *adev)
1236 {
1237         const __le32 *fw_data;
1238         volatile u32 *dst_ptr;
1239         int me, i, max_me = 4;
1240         u32 bo_offset = 0;
1241         u32 table_offset, table_size;
1242
1243         if (adev->asic_type == CHIP_CARRIZO)
1244                 max_me = 5;
1245
1246         /* write the cp table buffer */
1247         dst_ptr = adev->gfx.rlc.cp_table_ptr;
1248         for (me = 0; me < max_me; me++) {
1249                 if (me == 0) {
1250                         const struct gfx_firmware_header_v1_0 *hdr =
1251                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1252                         fw_data = (const __le32 *)
1253                                 (adev->gfx.ce_fw->data +
1254                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1255                         table_offset = le32_to_cpu(hdr->jt_offset);
1256                         table_size = le32_to_cpu(hdr->jt_size);
1257                 } else if (me == 1) {
1258                         const struct gfx_firmware_header_v1_0 *hdr =
1259                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1260                         fw_data = (const __le32 *)
1261                                 (adev->gfx.pfp_fw->data +
1262                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1263                         table_offset = le32_to_cpu(hdr->jt_offset);
1264                         table_size = le32_to_cpu(hdr->jt_size);
1265                 } else if (me == 2) {
1266                         const struct gfx_firmware_header_v1_0 *hdr =
1267                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1268                         fw_data = (const __le32 *)
1269                                 (adev->gfx.me_fw->data +
1270                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1271                         table_offset = le32_to_cpu(hdr->jt_offset);
1272                         table_size = le32_to_cpu(hdr->jt_size);
1273                 } else if (me == 3) {
1274                         const struct gfx_firmware_header_v1_0 *hdr =
1275                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1276                         fw_data = (const __le32 *)
1277                                 (adev->gfx.mec_fw->data +
1278                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1279                         table_offset = le32_to_cpu(hdr->jt_offset);
1280                         table_size = le32_to_cpu(hdr->jt_size);
1281                 } else  if (me == 4) {
1282                         const struct gfx_firmware_header_v1_0 *hdr =
1283                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
1284                         fw_data = (const __le32 *)
1285                                 (adev->gfx.mec2_fw->data +
1286                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1287                         table_offset = le32_to_cpu(hdr->jt_offset);
1288                         table_size = le32_to_cpu(hdr->jt_size);
1289                 }
1290
1291                 for (i = 0; i < table_size; i ++) {
1292                         dst_ptr[bo_offset + i] =
1293                                 cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
1294                 }
1295
1296                 bo_offset += table_size;
1297         }
1298 }
1299
1300 static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev)
1301 {
1302         amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, NULL, NULL);
1303         amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, NULL, NULL);
1304 }
1305
1306 static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
1307 {
1308         volatile u32 *dst_ptr;
1309         u32 dws;
1310         const struct cs_section_def *cs_data;
1311         int r;
1312
1313         adev->gfx.rlc.cs_data = vi_cs_data;
1314
1315         cs_data = adev->gfx.rlc.cs_data;
1316
1317         if (cs_data) {
1318                 /* clear state block */
1319                 adev->gfx.rlc.clear_state_size = dws = gfx_v8_0_get_csb_size(adev);
1320
1321                 r = amdgpu_bo_create_reserved(adev, dws * 4, PAGE_SIZE,
1322                                               AMDGPU_GEM_DOMAIN_VRAM,
1323                                               &adev->gfx.rlc.clear_state_obj,
1324                                               &adev->gfx.rlc.clear_state_gpu_addr,
1325                                               (void **)&adev->gfx.rlc.cs_ptr);
1326                 if (r) {
1327                         dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r);
1328                         gfx_v8_0_rlc_fini(adev);
1329                         return r;
1330                 }
1331
1332                 /* set up the cs buffer */
1333                 dst_ptr = adev->gfx.rlc.cs_ptr;
1334                 gfx_v8_0_get_csb_buffer(adev, dst_ptr);
1335                 amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
1336                 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1337         }
1338
1339         if ((adev->asic_type == CHIP_CARRIZO) ||
1340             (adev->asic_type == CHIP_STONEY)) {
1341                 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1342                 r = amdgpu_bo_create_reserved(adev, adev->gfx.rlc.cp_table_size,
1343                                               PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
1344                                               &adev->gfx.rlc.cp_table_obj,
1345                                               &adev->gfx.rlc.cp_table_gpu_addr,
1346                                               (void **)&adev->gfx.rlc.cp_table_ptr);
1347                 if (r) {
1348                         dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r);
1349                         return r;
1350                 }
1351
1352                 cz_init_cp_jump_table(adev);
1353
1354                 amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj);
1355                 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1356         }
1357
1358         return 0;
1359 }
1360
1361 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
1362 {
1363         amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1364 }
1365
1366 static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
1367 {
1368         int r;
1369         u32 *hpd;
1370         size_t mec_hpd_size;
1371
1372         bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1373
1374         /* take ownership of the relevant compute queues */
1375         amdgpu_gfx_compute_queue_acquire(adev);
1376
1377         mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE;
1378
1379         r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1380                                       AMDGPU_GEM_DOMAIN_GTT,
1381                                       &adev->gfx.mec.hpd_eop_obj,
1382                                       &adev->gfx.mec.hpd_eop_gpu_addr,
1383                                       (void **)&hpd);
1384         if (r) {
1385                 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1386                 return r;
1387         }
1388
1389         memset(hpd, 0, mec_hpd_size);
1390
1391         amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1392         amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1393
1394         return 0;
1395 }
1396
1397 static const u32 vgpr_init_compute_shader[] =
1398 {
1399         0x7e000209, 0x7e020208,
1400         0x7e040207, 0x7e060206,
1401         0x7e080205, 0x7e0a0204,
1402         0x7e0c0203, 0x7e0e0202,
1403         0x7e100201, 0x7e120200,
1404         0x7e140209, 0x7e160208,
1405         0x7e180207, 0x7e1a0206,
1406         0x7e1c0205, 0x7e1e0204,
1407         0x7e200203, 0x7e220202,
1408         0x7e240201, 0x7e260200,
1409         0x7e280209, 0x7e2a0208,
1410         0x7e2c0207, 0x7e2e0206,
1411         0x7e300205, 0x7e320204,
1412         0x7e340203, 0x7e360202,
1413         0x7e380201, 0x7e3a0200,
1414         0x7e3c0209, 0x7e3e0208,
1415         0x7e400207, 0x7e420206,
1416         0x7e440205, 0x7e460204,
1417         0x7e480203, 0x7e4a0202,
1418         0x7e4c0201, 0x7e4e0200,
1419         0x7e500209, 0x7e520208,
1420         0x7e540207, 0x7e560206,
1421         0x7e580205, 0x7e5a0204,
1422         0x7e5c0203, 0x7e5e0202,
1423         0x7e600201, 0x7e620200,
1424         0x7e640209, 0x7e660208,
1425         0x7e680207, 0x7e6a0206,
1426         0x7e6c0205, 0x7e6e0204,
1427         0x7e700203, 0x7e720202,
1428         0x7e740201, 0x7e760200,
1429         0x7e780209, 0x7e7a0208,
1430         0x7e7c0207, 0x7e7e0206,
1431         0xbf8a0000, 0xbf810000,
1432 };
1433
1434 static const u32 sgpr_init_compute_shader[] =
1435 {
1436         0xbe8a0100, 0xbe8c0102,
1437         0xbe8e0104, 0xbe900106,
1438         0xbe920108, 0xbe940100,
1439         0xbe960102, 0xbe980104,
1440         0xbe9a0106, 0xbe9c0108,
1441         0xbe9e0100, 0xbea00102,
1442         0xbea20104, 0xbea40106,
1443         0xbea60108, 0xbea80100,
1444         0xbeaa0102, 0xbeac0104,
1445         0xbeae0106, 0xbeb00108,
1446         0xbeb20100, 0xbeb40102,
1447         0xbeb60104, 0xbeb80106,
1448         0xbeba0108, 0xbebc0100,
1449         0xbebe0102, 0xbec00104,
1450         0xbec20106, 0xbec40108,
1451         0xbec60100, 0xbec80102,
1452         0xbee60004, 0xbee70005,
1453         0xbeea0006, 0xbeeb0007,
1454         0xbee80008, 0xbee90009,
1455         0xbefc0000, 0xbf8a0000,
1456         0xbf810000, 0x00000000,
1457 };
1458
1459 static const u32 vgpr_init_regs[] =
1460 {
1461         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1462         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */
1463         mmCOMPUTE_NUM_THREAD_X, 256*4,
1464         mmCOMPUTE_NUM_THREAD_Y, 1,
1465         mmCOMPUTE_NUM_THREAD_Z, 1,
1466         mmCOMPUTE_PGM_RSRC1, 0x100004f, /* VGPRS=15 (64 logical VGPRs), SGPRS=1 (16 SGPRs), BULKY=1 */
1467         mmCOMPUTE_PGM_RSRC2, 20,
1468         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1469         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1470         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1471         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1472         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1473         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1474         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1475         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1476         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1477         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1478 };
1479
1480 static const u32 sgpr1_init_regs[] =
1481 {
1482         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1483         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */
1484         mmCOMPUTE_NUM_THREAD_X, 256*5,
1485         mmCOMPUTE_NUM_THREAD_Y, 1,
1486         mmCOMPUTE_NUM_THREAD_Z, 1,
1487         mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */
1488         mmCOMPUTE_PGM_RSRC2, 20,
1489         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1490         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1491         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1492         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1493         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1494         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1495         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1496         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1497         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1498         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1499 };
1500
1501 static const u32 sgpr2_init_regs[] =
1502 {
1503         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1504         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1505         mmCOMPUTE_NUM_THREAD_X, 256*5,
1506         mmCOMPUTE_NUM_THREAD_Y, 1,
1507         mmCOMPUTE_NUM_THREAD_Z, 1,
1508         mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */
1509         mmCOMPUTE_PGM_RSRC2, 20,
1510         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1511         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1512         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1513         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1514         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1515         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1516         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1517         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1518         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1519         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1520 };
1521
1522 static const u32 sec_ded_counter_registers[] =
1523 {
1524         mmCPC_EDC_ATC_CNT,
1525         mmCPC_EDC_SCRATCH_CNT,
1526         mmCPC_EDC_UCODE_CNT,
1527         mmCPF_EDC_ATC_CNT,
1528         mmCPF_EDC_ROQ_CNT,
1529         mmCPF_EDC_TAG_CNT,
1530         mmCPG_EDC_ATC_CNT,
1531         mmCPG_EDC_DMA_CNT,
1532         mmCPG_EDC_TAG_CNT,
1533         mmDC_EDC_CSINVOC_CNT,
1534         mmDC_EDC_RESTORE_CNT,
1535         mmDC_EDC_STATE_CNT,
1536         mmGDS_EDC_CNT,
1537         mmGDS_EDC_GRBM_CNT,
1538         mmGDS_EDC_OA_DED,
1539         mmSPI_EDC_CNT,
1540         mmSQC_ATC_EDC_GATCL1_CNT,
1541         mmSQC_EDC_CNT,
1542         mmSQ_EDC_DED_CNT,
1543         mmSQ_EDC_INFO,
1544         mmSQ_EDC_SEC_CNT,
1545         mmTCC_EDC_CNT,
1546         mmTCP_ATC_EDC_GATCL1_CNT,
1547         mmTCP_EDC_CNT,
1548         mmTD_EDC_CNT
1549 };
1550
1551 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1552 {
1553         struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1554         struct amdgpu_ib ib;
1555         struct dma_fence *f = NULL;
1556         int r, i;
1557         u32 tmp;
1558         unsigned total_size, vgpr_offset, sgpr_offset;
1559         u64 gpu_addr;
1560
1561         /* only supported on CZ */
1562         if (adev->asic_type != CHIP_CARRIZO)
1563                 return 0;
1564
1565         /* bail if the compute ring is not ready */
1566         if (!ring->ready)
1567                 return 0;
1568
1569         tmp = RREG32(mmGB_EDC_MODE);
1570         WREG32(mmGB_EDC_MODE, 0);
1571
1572         total_size =
1573                 (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1574         total_size +=
1575                 (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1576         total_size +=
1577                 (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1578         total_size = ALIGN(total_size, 256);
1579         vgpr_offset = total_size;
1580         total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1581         sgpr_offset = total_size;
1582         total_size += sizeof(sgpr_init_compute_shader);
1583
1584         /* allocate an indirect buffer to put the commands in */
1585         memset(&ib, 0, sizeof(ib));
1586         r = amdgpu_ib_get(adev, NULL, total_size, &ib);
1587         if (r) {
1588                 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1589                 return r;
1590         }
1591
1592         /* load the compute shaders */
1593         for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1594                 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1595
1596         for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1597                 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1598
1599         /* init the ib length to 0 */
1600         ib.length_dw = 0;
1601
1602         /* VGPR */
1603         /* write the register state for the compute dispatch */
1604         for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1605                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1606                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1607                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1608         }
1609         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1610         gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1611         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1612         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1613         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1614         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1615
1616         /* write dispatch packet */
1617         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1618         ib.ptr[ib.length_dw++] = 8; /* x */
1619         ib.ptr[ib.length_dw++] = 1; /* y */
1620         ib.ptr[ib.length_dw++] = 1; /* z */
1621         ib.ptr[ib.length_dw++] =
1622                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1623
1624         /* write CS partial flush packet */
1625         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1626         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1627
1628         /* SGPR1 */
1629         /* write the register state for the compute dispatch */
1630         for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1631                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1632                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1633                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1634         }
1635         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1636         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1637         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1638         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1639         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1640         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1641
1642         /* write dispatch packet */
1643         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1644         ib.ptr[ib.length_dw++] = 8; /* x */
1645         ib.ptr[ib.length_dw++] = 1; /* y */
1646         ib.ptr[ib.length_dw++] = 1; /* z */
1647         ib.ptr[ib.length_dw++] =
1648                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1649
1650         /* write CS partial flush packet */
1651         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1652         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1653
1654         /* SGPR2 */
1655         /* write the register state for the compute dispatch */
1656         for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1657                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1658                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1659                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1660         }
1661         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1662         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1663         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1664         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1665         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1666         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1667
1668         /* write dispatch packet */
1669         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1670         ib.ptr[ib.length_dw++] = 8; /* x */
1671         ib.ptr[ib.length_dw++] = 1; /* y */
1672         ib.ptr[ib.length_dw++] = 1; /* z */
1673         ib.ptr[ib.length_dw++] =
1674                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1675
1676         /* write CS partial flush packet */
1677         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1678         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1679
1680         /* shedule the ib on the ring */
1681         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1682         if (r) {
1683                 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1684                 goto fail;
1685         }
1686
1687         /* wait for the GPU to finish processing the IB */
1688         r = dma_fence_wait(f, false);
1689         if (r) {
1690                 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1691                 goto fail;
1692         }
1693
1694         tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1695         tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1696         WREG32(mmGB_EDC_MODE, tmp);
1697
1698         tmp = RREG32(mmCC_GC_EDC_CONFIG);
1699         tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1700         WREG32(mmCC_GC_EDC_CONFIG, tmp);
1701
1702
1703         /* read back registers to clear the counters */
1704         for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1705                 RREG32(sec_ded_counter_registers[i]);
1706
1707 fail:
1708         amdgpu_ib_free(adev, &ib, NULL);
1709         dma_fence_put(f);
1710
1711         return r;
1712 }
1713
1714 static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1715 {
1716         u32 gb_addr_config;
1717         u32 mc_shared_chmap, mc_arb_ramcfg;
1718         u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1719         u32 tmp;
1720         int ret;
1721
1722         switch (adev->asic_type) {
1723         case CHIP_TOPAZ:
1724                 adev->gfx.config.max_shader_engines = 1;
1725                 adev->gfx.config.max_tile_pipes = 2;
1726                 adev->gfx.config.max_cu_per_sh = 6;
1727                 adev->gfx.config.max_sh_per_se = 1;
1728                 adev->gfx.config.max_backends_per_se = 2;
1729                 adev->gfx.config.max_texture_channel_caches = 2;
1730                 adev->gfx.config.max_gprs = 256;
1731                 adev->gfx.config.max_gs_threads = 32;
1732                 adev->gfx.config.max_hw_contexts = 8;
1733
1734                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1735                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1736                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1737                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1738                 gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1739                 break;
1740         case CHIP_FIJI:
1741                 adev->gfx.config.max_shader_engines = 4;
1742                 adev->gfx.config.max_tile_pipes = 16;
1743                 adev->gfx.config.max_cu_per_sh = 16;
1744                 adev->gfx.config.max_sh_per_se = 1;
1745                 adev->gfx.config.max_backends_per_se = 4;
1746                 adev->gfx.config.max_texture_channel_caches = 16;
1747                 adev->gfx.config.max_gprs = 256;
1748                 adev->gfx.config.max_gs_threads = 32;
1749                 adev->gfx.config.max_hw_contexts = 8;
1750
1751                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1752                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1753                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1754                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1755                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1756                 break;
1757         case CHIP_POLARIS11:
1758         case CHIP_POLARIS12:
1759                 ret = amdgpu_atombios_get_gfx_info(adev);
1760                 if (ret)
1761                         return ret;
1762                 adev->gfx.config.max_gprs = 256;
1763                 adev->gfx.config.max_gs_threads = 32;
1764                 adev->gfx.config.max_hw_contexts = 8;
1765
1766                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1767                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1768                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1769                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1770                 gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
1771                 break;
1772         case CHIP_POLARIS10:
1773                 ret = amdgpu_atombios_get_gfx_info(adev);
1774                 if (ret)
1775                         return ret;
1776                 adev->gfx.config.max_gprs = 256;
1777                 adev->gfx.config.max_gs_threads = 32;
1778                 adev->gfx.config.max_hw_contexts = 8;
1779
1780                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1781                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1782                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1783                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1784                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1785                 break;
1786         case CHIP_TONGA:
1787                 adev->gfx.config.max_shader_engines = 4;
1788                 adev->gfx.config.max_tile_pipes = 8;
1789                 adev->gfx.config.max_cu_per_sh = 8;
1790                 adev->gfx.config.max_sh_per_se = 1;
1791                 adev->gfx.config.max_backends_per_se = 2;
1792                 adev->gfx.config.max_texture_channel_caches = 8;
1793                 adev->gfx.config.max_gprs = 256;
1794                 adev->gfx.config.max_gs_threads = 32;
1795                 adev->gfx.config.max_hw_contexts = 8;
1796
1797                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1798                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1799                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1800                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1801                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1802                 break;
1803         case CHIP_CARRIZO:
1804                 adev->gfx.config.max_shader_engines = 1;
1805                 adev->gfx.config.max_tile_pipes = 2;
1806                 adev->gfx.config.max_sh_per_se = 1;
1807                 adev->gfx.config.max_backends_per_se = 2;
1808                 adev->gfx.config.max_cu_per_sh = 8;
1809                 adev->gfx.config.max_texture_channel_caches = 2;
1810                 adev->gfx.config.max_gprs = 256;
1811                 adev->gfx.config.max_gs_threads = 32;
1812                 adev->gfx.config.max_hw_contexts = 8;
1813
1814                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1815                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1816                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1817                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1818                 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1819                 break;
1820         case CHIP_STONEY:
1821                 adev->gfx.config.max_shader_engines = 1;
1822                 adev->gfx.config.max_tile_pipes = 2;
1823                 adev->gfx.config.max_sh_per_se = 1;
1824                 adev->gfx.config.max_backends_per_se = 1;
1825                 adev->gfx.config.max_cu_per_sh = 3;
1826                 adev->gfx.config.max_texture_channel_caches = 2;
1827                 adev->gfx.config.max_gprs = 256;
1828                 adev->gfx.config.max_gs_threads = 16;
1829                 adev->gfx.config.max_hw_contexts = 8;
1830
1831                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1832                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1833                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1834                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1835                 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1836                 break;
1837         default:
1838                 adev->gfx.config.max_shader_engines = 2;
1839                 adev->gfx.config.max_tile_pipes = 4;
1840                 adev->gfx.config.max_cu_per_sh = 2;
1841                 adev->gfx.config.max_sh_per_se = 1;
1842                 adev->gfx.config.max_backends_per_se = 2;
1843                 adev->gfx.config.max_texture_channel_caches = 4;
1844                 adev->gfx.config.max_gprs = 256;
1845                 adev->gfx.config.max_gs_threads = 32;
1846                 adev->gfx.config.max_hw_contexts = 8;
1847
1848                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1849                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1850                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1851                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1852                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1853                 break;
1854         }
1855
1856         mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
1857         adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1858         mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1859
1860         adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1861         adev->gfx.config.mem_max_burst_length_bytes = 256;
1862         if (adev->flags & AMD_IS_APU) {
1863                 /* Get memory bank mapping mode. */
1864                 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
1865                 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1866                 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1867
1868                 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
1869                 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1870                 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1871
1872                 /* Validate settings in case only one DIMM installed. */
1873                 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
1874                         dimm00_addr_map = 0;
1875                 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
1876                         dimm01_addr_map = 0;
1877                 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
1878                         dimm10_addr_map = 0;
1879                 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
1880                         dimm11_addr_map = 0;
1881
1882                 /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1883                 /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1884                 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
1885                         adev->gfx.config.mem_row_size_in_kb = 2;
1886                 else
1887                         adev->gfx.config.mem_row_size_in_kb = 1;
1888         } else {
1889                 tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
1890                 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1891                 if (adev->gfx.config.mem_row_size_in_kb > 4)
1892                         adev->gfx.config.mem_row_size_in_kb = 4;
1893         }
1894
1895         adev->gfx.config.shader_engine_tile_size = 32;
1896         adev->gfx.config.num_gpus = 1;
1897         adev->gfx.config.multi_gpu_tile_size = 64;
1898
1899         /* fix up row size */
1900         switch (adev->gfx.config.mem_row_size_in_kb) {
1901         case 1:
1902         default:
1903                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
1904                 break;
1905         case 2:
1906                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
1907                 break;
1908         case 4:
1909                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
1910                 break;
1911         }
1912         adev->gfx.config.gb_addr_config = gb_addr_config;
1913
1914         return 0;
1915 }
1916
1917 static int gfx_v8_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
1918                                         int mec, int pipe, int queue)
1919 {
1920         int r;
1921         unsigned irq_type;
1922         struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
1923
1924         ring = &adev->gfx.compute_ring[ring_id];
1925
1926         /* mec0 is me1 */
1927         ring->me = mec + 1;
1928         ring->pipe = pipe;
1929         ring->queue = queue;
1930
1931         ring->ring_obj = NULL;
1932         ring->use_doorbell = true;
1933         ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + ring_id;
1934         ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
1935                                 + (ring_id * GFX8_MEC_HPD_SIZE);
1936         sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
1937
1938         irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
1939                 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
1940                 + ring->pipe;
1941
1942         /* type-2 packets are deprecated on MEC, use type-3 instead */
1943         r = amdgpu_ring_init(adev, ring, 1024,
1944                         &adev->gfx.eop_irq, irq_type);
1945         if (r)
1946                 return r;
1947
1948
1949         return 0;
1950 }
1951
1952 static int gfx_v8_0_sw_init(void *handle)
1953 {
1954         int i, j, k, r, ring_id;
1955         struct amdgpu_ring *ring;
1956         struct amdgpu_kiq *kiq;
1957         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1958
1959         switch (adev->asic_type) {
1960         case CHIP_FIJI:
1961         case CHIP_TONGA:
1962         case CHIP_POLARIS11:
1963         case CHIP_POLARIS12:
1964         case CHIP_POLARIS10:
1965         case CHIP_CARRIZO:
1966                 adev->gfx.mec.num_mec = 2;
1967                 break;
1968         case CHIP_TOPAZ:
1969         case CHIP_STONEY:
1970         default:
1971                 adev->gfx.mec.num_mec = 1;
1972                 break;
1973         }
1974
1975         adev->gfx.mec.num_pipe_per_mec = 4;
1976         adev->gfx.mec.num_queue_per_pipe = 8;
1977
1978         /* KIQ event */
1979         r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 178, &adev->gfx.kiq.irq);
1980         if (r)
1981                 return r;
1982
1983         /* EOP Event */
1984         r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 181, &adev->gfx.eop_irq);
1985         if (r)
1986                 return r;
1987
1988         /* Privileged reg */
1989         r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 184,
1990                               &adev->gfx.priv_reg_irq);
1991         if (r)
1992                 return r;
1993
1994         /* Privileged inst */
1995         r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 185,
1996                               &adev->gfx.priv_inst_irq);
1997         if (r)
1998                 return r;
1999
2000         adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2001
2002         gfx_v8_0_scratch_init(adev);
2003
2004         r = gfx_v8_0_init_microcode(adev);
2005         if (r) {
2006                 DRM_ERROR("Failed to load gfx firmware!\n");
2007                 return r;
2008         }
2009
2010         r = gfx_v8_0_rlc_init(adev);
2011         if (r) {
2012                 DRM_ERROR("Failed to init rlc BOs!\n");
2013                 return r;
2014         }
2015
2016         r = gfx_v8_0_mec_init(adev);
2017         if (r) {
2018                 DRM_ERROR("Failed to init MEC BOs!\n");
2019                 return r;
2020         }
2021
2022         /* set up the gfx ring */
2023         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2024                 ring = &adev->gfx.gfx_ring[i];
2025                 ring->ring_obj = NULL;
2026                 sprintf(ring->name, "gfx");
2027                 /* no gfx doorbells on iceland */
2028                 if (adev->asic_type != CHIP_TOPAZ) {
2029                         ring->use_doorbell = true;
2030                         ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0;
2031                 }
2032
2033                 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2034                                      AMDGPU_CP_IRQ_GFX_EOP);
2035                 if (r)
2036                         return r;
2037         }
2038
2039
2040         /* set up the compute queues - allocate horizontally across pipes */
2041         ring_id = 0;
2042         for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
2043                 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
2044                         for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
2045                                 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
2046                                         continue;
2047
2048                                 r = gfx_v8_0_compute_ring_init(adev,
2049                                                                 ring_id,
2050                                                                 i, k, j);
2051                                 if (r)
2052                                         return r;
2053
2054                                 ring_id++;
2055                         }
2056                 }
2057         }
2058
2059         r = amdgpu_gfx_kiq_init(adev, GFX8_MEC_HPD_SIZE);
2060         if (r) {
2061                 DRM_ERROR("Failed to init KIQ BOs!\n");
2062                 return r;
2063         }
2064
2065         kiq = &adev->gfx.kiq;
2066         r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
2067         if (r)
2068                 return r;
2069
2070         /* create MQD for all compute queues as well as KIQ for SRIOV case */
2071         r = amdgpu_gfx_compute_mqd_sw_init(adev, sizeof(struct vi_mqd_allocation));
2072         if (r)
2073                 return r;
2074
2075         /* reserve GDS, GWS and OA resource for gfx */
2076         r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size,
2077                                     PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS,
2078                                     &adev->gds.gds_gfx_bo, NULL, NULL);
2079         if (r)
2080                 return r;
2081
2082         r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size,
2083                                     PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS,
2084                                     &adev->gds.gws_gfx_bo, NULL, NULL);
2085         if (r)
2086                 return r;
2087
2088         r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size,
2089                                     PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA,
2090                                     &adev->gds.oa_gfx_bo, NULL, NULL);
2091         if (r)
2092                 return r;
2093
2094         adev->gfx.ce_ram_size = 0x8000;
2095
2096         r = gfx_v8_0_gpu_early_init(adev);
2097         if (r)
2098                 return r;
2099
2100         return 0;
2101 }
2102
2103 static int gfx_v8_0_sw_fini(void *handle)
2104 {
2105         int i;
2106         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2107
2108         amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
2109         amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL);
2110         amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
2111
2112         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2113                 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2114         for (i = 0; i < adev->gfx.num_compute_rings; i++)
2115                 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2116
2117         amdgpu_gfx_compute_mqd_sw_fini(adev);
2118         amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
2119         amdgpu_gfx_kiq_fini(adev);
2120
2121         gfx_v8_0_mec_fini(adev);
2122         gfx_v8_0_rlc_fini(adev);
2123         amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
2124                                 &adev->gfx.rlc.clear_state_gpu_addr,
2125                                 (void **)&adev->gfx.rlc.cs_ptr);
2126         if ((adev->asic_type == CHIP_CARRIZO) ||
2127             (adev->asic_type == CHIP_STONEY)) {
2128                 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
2129                                 &adev->gfx.rlc.cp_table_gpu_addr,
2130                                 (void **)&adev->gfx.rlc.cp_table_ptr);
2131         }
2132         gfx_v8_0_free_microcode(adev);
2133
2134         return 0;
2135 }
2136
2137 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
2138 {
2139         uint32_t *modearray, *mod2array;
2140         const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
2141         const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
2142         u32 reg_offset;
2143
2144         modearray = adev->gfx.config.tile_mode_array;
2145         mod2array = adev->gfx.config.macrotile_mode_array;
2146
2147         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2148                 modearray[reg_offset] = 0;
2149
2150         for (reg_offset = 0; reg_offset <  num_secondary_tile_mode_states; reg_offset++)
2151                 mod2array[reg_offset] = 0;
2152
2153         switch (adev->asic_type) {
2154         case CHIP_TOPAZ:
2155                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2156                                 PIPE_CONFIG(ADDR_SURF_P2) |
2157                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2158                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2159                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2160                                 PIPE_CONFIG(ADDR_SURF_P2) |
2161                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2162                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2163                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2164                                 PIPE_CONFIG(ADDR_SURF_P2) |
2165                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2166                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2167                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2168                                 PIPE_CONFIG(ADDR_SURF_P2) |
2169                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2170                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2171                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2172                                 PIPE_CONFIG(ADDR_SURF_P2) |
2173                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2174                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2175                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2176                                 PIPE_CONFIG(ADDR_SURF_P2) |
2177                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2178                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2179                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2180                                 PIPE_CONFIG(ADDR_SURF_P2) |
2181                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2182                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2183                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2184                                 PIPE_CONFIG(ADDR_SURF_P2));
2185                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2186                                 PIPE_CONFIG(ADDR_SURF_P2) |
2187                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2188                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2189                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2190                                  PIPE_CONFIG(ADDR_SURF_P2) |
2191                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2192                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2193                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2194                                  PIPE_CONFIG(ADDR_SURF_P2) |
2195                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2196                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2197                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2198                                  PIPE_CONFIG(ADDR_SURF_P2) |
2199                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2200                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2201                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2202                                  PIPE_CONFIG(ADDR_SURF_P2) |
2203                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2204                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2205                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2206                                  PIPE_CONFIG(ADDR_SURF_P2) |
2207                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2208                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2209                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2210                                  PIPE_CONFIG(ADDR_SURF_P2) |
2211                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2212                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2213                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2214                                  PIPE_CONFIG(ADDR_SURF_P2) |
2215                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2216                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2217                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2218                                  PIPE_CONFIG(ADDR_SURF_P2) |
2219                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2220                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2221                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2222                                  PIPE_CONFIG(ADDR_SURF_P2) |
2223                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2224                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2225                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2226                                  PIPE_CONFIG(ADDR_SURF_P2) |
2227                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2228                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2229                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2230                                  PIPE_CONFIG(ADDR_SURF_P2) |
2231                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2232                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2233                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2234                                  PIPE_CONFIG(ADDR_SURF_P2) |
2235                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2236                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2237                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2238                                  PIPE_CONFIG(ADDR_SURF_P2) |
2239                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2240                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2241                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2242                                  PIPE_CONFIG(ADDR_SURF_P2) |
2243                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2244                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2245                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2246                                  PIPE_CONFIG(ADDR_SURF_P2) |
2247                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2248                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2249                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2250                                  PIPE_CONFIG(ADDR_SURF_P2) |
2251                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2252                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2253                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2254                                  PIPE_CONFIG(ADDR_SURF_P2) |
2255                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2256                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2257
2258                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2259                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2260                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2261                                 NUM_BANKS(ADDR_SURF_8_BANK));
2262                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2263                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2264                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2265                                 NUM_BANKS(ADDR_SURF_8_BANK));
2266                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2267                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2268                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2269                                 NUM_BANKS(ADDR_SURF_8_BANK));
2270                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2271                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2272                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2273                                 NUM_BANKS(ADDR_SURF_8_BANK));
2274                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2275                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2276                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2277                                 NUM_BANKS(ADDR_SURF_8_BANK));
2278                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2279                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2280                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2281                                 NUM_BANKS(ADDR_SURF_8_BANK));
2282                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2283                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2284                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2285                                 NUM_BANKS(ADDR_SURF_8_BANK));
2286                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2287                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2288                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2289                                 NUM_BANKS(ADDR_SURF_16_BANK));
2290                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2291                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2292                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2293                                 NUM_BANKS(ADDR_SURF_16_BANK));
2294                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2295                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2296                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2297                                  NUM_BANKS(ADDR_SURF_16_BANK));
2298                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2299                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2300                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2301                                  NUM_BANKS(ADDR_SURF_16_BANK));
2302                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2303                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2304                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2305                                  NUM_BANKS(ADDR_SURF_16_BANK));
2306                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2307                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2308                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2309                                  NUM_BANKS(ADDR_SURF_16_BANK));
2310                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2311                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2312                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2313                                  NUM_BANKS(ADDR_SURF_8_BANK));
2314
2315                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2316                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2317                             reg_offset != 23)
2318                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2319
2320                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2321                         if (reg_offset != 7)
2322                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2323
2324                 break;
2325         case CHIP_FIJI:
2326                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2327                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2328                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2329                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2330                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2331                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2332                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2333                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2334                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2335                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2336                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2337                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2338                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2339                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2340                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2341                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2342                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2343                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2344                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2345                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2346                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2347                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2348                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2349                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2350                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2351                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2352                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2353                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2354                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2355                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2356                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2357                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2358                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2359                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2360                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2361                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2362                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2363                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2364                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2365                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2366                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2367                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2368                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2369                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2370                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2371                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2372                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2373                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2374                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2375                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2376                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2377                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2378                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2379                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2380                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2381                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2382                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2383                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2384                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2385                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2386                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2387                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2388                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2389                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2390                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2391                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2392                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2393                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2394                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2395                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2396                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2397                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2398                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2399                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2400                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2401                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2402                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2403                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2404                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2405                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2406                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2407                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2408                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2409                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2410                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2411                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2412                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2413                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2414                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2415                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2416                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2417                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2418                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2419                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2420                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2421                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2422                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2423                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2424                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2425                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2426                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2427                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2428                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2429                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2430                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2431                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2432                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2433                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2434                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2435                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2436                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2437                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2438                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2439                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2440                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2441                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2442                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2443                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2444                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2445                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2446                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2447                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2448
2449                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2450                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2451                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2452                                 NUM_BANKS(ADDR_SURF_8_BANK));
2453                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2454                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2455                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2456                                 NUM_BANKS(ADDR_SURF_8_BANK));
2457                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2458                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2459                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2460                                 NUM_BANKS(ADDR_SURF_8_BANK));
2461                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2462                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2463                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2464                                 NUM_BANKS(ADDR_SURF_8_BANK));
2465                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2466                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2467                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2468                                 NUM_BANKS(ADDR_SURF_8_BANK));
2469                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2470                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2471                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2472                                 NUM_BANKS(ADDR_SURF_8_BANK));
2473                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2474                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2475                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2476                                 NUM_BANKS(ADDR_SURF_8_BANK));
2477                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2478                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2479                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2480                                 NUM_BANKS(ADDR_SURF_8_BANK));
2481                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2482                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2483                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2484                                 NUM_BANKS(ADDR_SURF_8_BANK));
2485                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2486                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2487                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2488                                  NUM_BANKS(ADDR_SURF_8_BANK));
2489                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2490                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2491                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2492                                  NUM_BANKS(ADDR_SURF_8_BANK));
2493                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2494                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2495                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2496                                  NUM_BANKS(ADDR_SURF_8_BANK));
2497                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2498                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2499                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2500                                  NUM_BANKS(ADDR_SURF_8_BANK));
2501                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2502                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2503                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2504                                  NUM_BANKS(ADDR_SURF_4_BANK));
2505
2506                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2507                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2508
2509                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2510                         if (reg_offset != 7)
2511                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2512
2513                 break;
2514         case CHIP_TONGA:
2515                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2516                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2517                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2518                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2519                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2520                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2521                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2522                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2523                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2524                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2525                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2526                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2527                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2528                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2529                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2530                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2531                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2532                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2533                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2534                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2535                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2536                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2537                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2538                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2539                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2540                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2541                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2542                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2543                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2544                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2545                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2546                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2547                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2548                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2549                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2550                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2551                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2552                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2553                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2554                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2555                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2556                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2557                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2558                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2559                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2560                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2561                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2562                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2563                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2564                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2565                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2566                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2567                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2568                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2569                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2570                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2571                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2572                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2573                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2574                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2575                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2576                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2577                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2578                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2579                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2580                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2581                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2582                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2583                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2584                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2585                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2586                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2587                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2588                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2589                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2590                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2591                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2592                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2593                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2594                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2595                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2596                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2597                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2598                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2599                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2600                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2601                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2602                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2603                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2604                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2605                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2606                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2607                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2608                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2609                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2610                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2611                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2612                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2613                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2614                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2615                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2616                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2617                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2618                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2619                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2620                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2621                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2622                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2623                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2624                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2625                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2626                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2627                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2628                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2629                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2630                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2631                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2632                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2633                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2634                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2635                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2636                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2637
2638                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2639                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2640                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2641                                 NUM_BANKS(ADDR_SURF_16_BANK));
2642                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2643                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2644                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2645                                 NUM_BANKS(ADDR_SURF_16_BANK));
2646                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2647                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2648                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2649                                 NUM_BANKS(ADDR_SURF_16_BANK));
2650                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2651                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2652                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2653                                 NUM_BANKS(ADDR_SURF_16_BANK));
2654                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2655                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2656                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2657                                 NUM_BANKS(ADDR_SURF_16_BANK));
2658                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2659                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2660                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2661                                 NUM_BANKS(ADDR_SURF_16_BANK));
2662                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2663                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2664                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2665                                 NUM_BANKS(ADDR_SURF_16_BANK));
2666                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2667                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2668                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2669                                 NUM_BANKS(ADDR_SURF_16_BANK));
2670                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2671                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2672                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2673                                 NUM_BANKS(ADDR_SURF_16_BANK));
2674                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2675                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2676                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2677                                  NUM_BANKS(ADDR_SURF_16_BANK));
2678                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2679                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2680                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2681                                  NUM_BANKS(ADDR_SURF_16_BANK));
2682                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2683                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2684                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2685                                  NUM_BANKS(ADDR_SURF_8_BANK));
2686                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2687                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2688                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2689                                  NUM_BANKS(ADDR_SURF_4_BANK));
2690                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2691                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2692                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2693                                  NUM_BANKS(ADDR_SURF_4_BANK));
2694
2695                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2696                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2697
2698                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2699                         if (reg_offset != 7)
2700                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2701
2702                 break;
2703         case CHIP_POLARIS11:
2704         case CHIP_POLARIS12:
2705                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2706                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2707                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2708                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2709                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2710                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2711                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2712                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2713                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2714                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2715                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2716                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2717                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2718                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2719                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2720                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2721                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2722                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2723                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2724                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2725                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2726                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2727                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2728                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2729                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2730                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2731                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2732                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2733                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2734                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2735                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2736                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2737                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2738                                 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2739                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2740                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2741                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2742                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2743                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2744                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2745                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2746                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2747                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2748                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2749                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2750                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2751                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2752                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2753                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2754                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2755                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2756                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2757                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2758                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2759                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2760                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2761                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2762                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2763                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2764                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2765                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2766                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2767                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2768                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2769                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2770                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2771                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2772                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2773                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2774                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2775                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2776                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2777                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2778                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2779                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2780                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2781                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2782                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2783                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2784                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2785                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2786                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2787                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2788                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2789                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2790                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2791                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2792                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2793                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2794                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2795                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2796                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2797                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2798                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2799                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2800                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2801                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2802                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2803                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2804                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2805                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2806                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2807                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2808                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2809                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2810                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2811                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2812                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2813                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2814                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2815                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2816                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2817                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2818                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2819                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2820                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2821                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2822                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2823                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2824                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2825                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2826                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2827
2828                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2829                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2830                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2831                                 NUM_BANKS(ADDR_SURF_16_BANK));
2832
2833                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2834                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2835                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2836                                 NUM_BANKS(ADDR_SURF_16_BANK));
2837
2838                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2839                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2840                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2841                                 NUM_BANKS(ADDR_SURF_16_BANK));
2842
2843                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2844                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2845                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2846                                 NUM_BANKS(ADDR_SURF_16_BANK));
2847
2848                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2849                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2850                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2851                                 NUM_BANKS(ADDR_SURF_16_BANK));
2852
2853                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2854                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2855                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2856                                 NUM_BANKS(ADDR_SURF_16_BANK));
2857
2858                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2859                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2860                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2861                                 NUM_BANKS(ADDR_SURF_16_BANK));
2862
2863                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2864                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2865                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2866                                 NUM_BANKS(ADDR_SURF_16_BANK));
2867
2868                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2869                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2870                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2871                                 NUM_BANKS(ADDR_SURF_16_BANK));
2872
2873                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2874                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2875                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2876                                 NUM_BANKS(ADDR_SURF_16_BANK));
2877
2878                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2879                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2880                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2881                                 NUM_BANKS(ADDR_SURF_16_BANK));
2882
2883                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2884                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2885                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2886                                 NUM_BANKS(ADDR_SURF_16_BANK));
2887
2888                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2889                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2890                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2891                                 NUM_BANKS(ADDR_SURF_8_BANK));
2892
2893                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2894                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2895                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2896                                 NUM_BANKS(ADDR_SURF_4_BANK));
2897
2898                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2899                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2900
2901                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2902                         if (reg_offset != 7)
2903                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2904
2905                 break;
2906         case CHIP_POLARIS10:
2907                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2908                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2909                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2910                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2911                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2912                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2913                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2914                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2915                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2916                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2917                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2918                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2919                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2920                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2921                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2922                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2923                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2924                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2925                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2926                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2927                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2928                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2929                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2930                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2931                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2932                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2933                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2934                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2935                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2936                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2937                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2938                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2939                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2940                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2941                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2942                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2943                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2944                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2945                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2946                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2947                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2948                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2949                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2950                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2951                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2952                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2953                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2954                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2955                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2956                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2957                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2958                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2959                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2960                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2961                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2962                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2963                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2964                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2965                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2966                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2967                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2968                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2969                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2970                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2971                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2972                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2973                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2974                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2975                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2976                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2977                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2978                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2979                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2980                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2981                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2982                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2983                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2984                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2985                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2986                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2987                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2988                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2989                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2990                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2991                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2992                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2993                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2994                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2995                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2996                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2997                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2998                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2999                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3000                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3001                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3002                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3003                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3004                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3005                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3006                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3007                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3008                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3009                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3010                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3011                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3012                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3013                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3014                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3015                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3016                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3017                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3018                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3019                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3020                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3021                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3022                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3023                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3024                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3025                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3026                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3027                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3028                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3029
3030                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3031                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3032                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3033                                 NUM_BANKS(ADDR_SURF_16_BANK));
3034
3035                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3036                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3037                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3038                                 NUM_BANKS(ADDR_SURF_16_BANK));
3039
3040                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3041                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3042                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3043                                 NUM_BANKS(ADDR_SURF_16_BANK));
3044
3045                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3046                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3047                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3048                                 NUM_BANKS(ADDR_SURF_16_BANK));
3049
3050                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3051                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3052                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3053                                 NUM_BANKS(ADDR_SURF_16_BANK));
3054
3055                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3056                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3057                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3058                                 NUM_BANKS(ADDR_SURF_16_BANK));
3059
3060                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3061                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3062                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3063                                 NUM_BANKS(ADDR_SURF_16_BANK));
3064
3065                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3066                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3067                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3068                                 NUM_BANKS(ADDR_SURF_16_BANK));
3069
3070                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3071                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3072                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3073                                 NUM_BANKS(ADDR_SURF_16_BANK));
3074
3075                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3076                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3077                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3078                                 NUM_BANKS(ADDR_SURF_16_BANK));
3079
3080                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3081                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3082                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3083                                 NUM_BANKS(ADDR_SURF_16_BANK));
3084
3085                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3086                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3087                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3088                                 NUM_BANKS(ADDR_SURF_8_BANK));
3089
3090                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3091                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3092                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3093                                 NUM_BANKS(ADDR_SURF_4_BANK));
3094
3095                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3096                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3097                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3098                                 NUM_BANKS(ADDR_SURF_4_BANK));
3099
3100                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3101                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3102
3103                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3104                         if (reg_offset != 7)
3105                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3106
3107                 break;
3108         case CHIP_STONEY:
3109                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3110                                 PIPE_CONFIG(ADDR_SURF_P2) |
3111                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3112                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3113                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3114                                 PIPE_CONFIG(ADDR_SURF_P2) |
3115                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3116                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3117                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3118                                 PIPE_CONFIG(ADDR_SURF_P2) |
3119                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3120                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3121                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3122                                 PIPE_CONFIG(ADDR_SURF_P2) |
3123                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3124                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3125                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3126                                 PIPE_CONFIG(ADDR_SURF_P2) |
3127                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3128                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3129                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3130                                 PIPE_CONFIG(ADDR_SURF_P2) |
3131                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3132                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3133                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3134                                 PIPE_CONFIG(ADDR_SURF_P2) |
3135                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3136                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3137                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3138                                 PIPE_CONFIG(ADDR_SURF_P2));
3139                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3140                                 PIPE_CONFIG(ADDR_SURF_P2) |
3141                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3142                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3143                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3144                                  PIPE_CONFIG(ADDR_SURF_P2) |
3145                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3146                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3147                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3148                                  PIPE_CONFIG(ADDR_SURF_P2) |
3149                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3150                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3151                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3152                                  PIPE_CONFIG(ADDR_SURF_P2) |
3153                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3154                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3155                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3156                                  PIPE_CONFIG(ADDR_SURF_P2) |
3157                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3158                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3159                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3160                                  PIPE_CONFIG(ADDR_SURF_P2) |
3161                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3162                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3163                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3164                                  PIPE_CONFIG(ADDR_SURF_P2) |
3165                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3166                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3167                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3168                                  PIPE_CONFIG(ADDR_SURF_P2) |
3169                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3170                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3171                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3172                                  PIPE_CONFIG(ADDR_SURF_P2) |
3173                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3174                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3175                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3176                                  PIPE_CONFIG(ADDR_SURF_P2) |
3177                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3178                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3179                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3180                                  PIPE_CONFIG(ADDR_SURF_P2) |
3181                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3182                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3183                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3184                                  PIPE_CONFIG(ADDR_SURF_P2) |
3185                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3186                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3187                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3188                                  PIPE_CONFIG(ADDR_SURF_P2) |
3189                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3190                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3191                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3192                                  PIPE_CONFIG(ADDR_SURF_P2) |
3193                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3194                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3195                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3196                                  PIPE_CONFIG(ADDR_SURF_P2) |
3197                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3198                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3199                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3200                                  PIPE_CONFIG(ADDR_SURF_P2) |
3201                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3202                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3203                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3204                                  PIPE_CONFIG(ADDR_SURF_P2) |
3205                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3206                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3207                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3208                                  PIPE_CONFIG(ADDR_SURF_P2) |
3209                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3210                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3211
3212                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3213                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3214                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3215                                 NUM_BANKS(ADDR_SURF_8_BANK));
3216                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3217                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3218                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3219                                 NUM_BANKS(ADDR_SURF_8_BANK));
3220                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3221                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3222                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3223                                 NUM_BANKS(ADDR_SURF_8_BANK));
3224                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3225                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3226                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3227                                 NUM_BANKS(ADDR_SURF_8_BANK));
3228                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3229                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3230                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3231                                 NUM_BANKS(ADDR_SURF_8_BANK));
3232                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3233                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3234                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3235                                 NUM_BANKS(ADDR_SURF_8_BANK));
3236                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3237                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3238                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3239                                 NUM_BANKS(ADDR_SURF_8_BANK));
3240                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3241                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3242                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3243                                 NUM_BANKS(ADDR_SURF_16_BANK));
3244                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3245                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3246                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3247                                 NUM_BANKS(ADDR_SURF_16_BANK));
3248                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3249                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3250                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3251                                  NUM_BANKS(ADDR_SURF_16_BANK));
3252                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3253                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3254                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3255                                  NUM_BANKS(ADDR_SURF_16_BANK));
3256                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3257                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3258                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3259                                  NUM_BANKS(ADDR_SURF_16_BANK));
3260                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3261                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3262                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3263                                  NUM_BANKS(ADDR_SURF_16_BANK));
3264                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3265                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3266                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3267                                  NUM_BANKS(ADDR_SURF_8_BANK));
3268
3269                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3270                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3271                             reg_offset != 23)
3272                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3273
3274                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3275                         if (reg_offset != 7)
3276                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3277
3278                 break;
3279         default:
3280                 dev_warn(adev->dev,
3281                          "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
3282                          adev->asic_type);
3283
3284         case CHIP_CARRIZO:
3285                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3286                                 PIPE_CONFIG(ADDR_SURF_P2) |
3287                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3288                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3289                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3290                                 PIPE_CONFIG(ADDR_SURF_P2) |
3291                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3292                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3293                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3294                                 PIPE_CONFIG(ADDR_SURF_P2) |
3295                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3296                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3297                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3298                                 PIPE_CONFIG(ADDR_SURF_P2) |
3299                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3300                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3301                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3302                                 PIPE_CONFIG(ADDR_SURF_P2) |
3303                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3304                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3305                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3306                                 PIPE_CONFIG(ADDR_SURF_P2) |
3307                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3308                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3309                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3310                                 PIPE_CONFIG(ADDR_SURF_P2) |
3311                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3312                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3313                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3314                                 PIPE_CONFIG(ADDR_SURF_P2));
3315                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3316                                 PIPE_CONFIG(ADDR_SURF_P2) |
3317                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3318                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3319                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3320                                  PIPE_CONFIG(ADDR_SURF_P2) |
3321                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3322                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3323                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3324                                  PIPE_CONFIG(ADDR_SURF_P2) |
3325                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3326                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3327                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3328                                  PIPE_CONFIG(ADDR_SURF_P2) |
3329                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3330                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3331                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3332                                  PIPE_CONFIG(ADDR_SURF_P2) |
3333                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3334                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3335                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3336                                  PIPE_CONFIG(ADDR_SURF_P2) |
3337                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3338                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3339                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3340                                  PIPE_CONFIG(ADDR_SURF_P2) |
3341                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3342                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3343                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3344                                  PIPE_CONFIG(ADDR_SURF_P2) |
3345                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3346                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3347                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3348                                  PIPE_CONFIG(ADDR_SURF_P2) |
3349                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3350                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3351                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3352                                  PIPE_CONFIG(ADDR_SURF_P2) |
3353                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3354                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3355                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3356                                  PIPE_CONFIG(ADDR_SURF_P2) |
3357                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3358                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3359                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3360                                  PIPE_CONFIG(ADDR_SURF_P2) |
3361                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3362                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3363                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3364                                  PIPE_CONFIG(ADDR_SURF_P2) |
3365                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3366                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3367                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3368                                  PIPE_CONFIG(ADDR_SURF_P2) |
3369                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3370                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3371                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3372                                  PIPE_CONFIG(ADDR_SURF_P2) |
3373                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3374                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3375                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3376                                  PIPE_CONFIG(ADDR_SURF_P2) |
3377                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3378                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3379                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3380                                  PIPE_CONFIG(ADDR_SURF_P2) |
3381                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3382                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3383                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3384                                  PIPE_CONFIG(ADDR_SURF_P2) |
3385                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3386                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3387
3388                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3389                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3390                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3391                                 NUM_BANKS(ADDR_SURF_8_BANK));
3392                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3393                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3394                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3395                                 NUM_BANKS(ADDR_SURF_8_BANK));
3396                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3397                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3398                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3399                                 NUM_BANKS(ADDR_SURF_8_BANK));
3400                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3401                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3402                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3403                                 NUM_BANKS(ADDR_SURF_8_BANK));
3404                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3405                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3406                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3407                                 NUM_BANKS(ADDR_SURF_8_BANK));
3408                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3409                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3410                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3411                                 NUM_BANKS(ADDR_SURF_8_BANK));
3412                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3413                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3414                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3415                                 NUM_BANKS(ADDR_SURF_8_BANK));
3416                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3417                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3418                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3419                                 NUM_BANKS(ADDR_SURF_16_BANK));
3420                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3421                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3422                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3423                                 NUM_BANKS(ADDR_SURF_16_BANK));
3424                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3425                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3426                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3427                                  NUM_BANKS(ADDR_SURF_16_BANK));
3428                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3429                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3430                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3431                                  NUM_BANKS(ADDR_SURF_16_BANK));
3432                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3433                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3434                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3435                                  NUM_BANKS(ADDR_SURF_16_BANK));
3436                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3437                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3438                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3439                                  NUM_BANKS(ADDR_SURF_16_BANK));
3440                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3441                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3442                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3443                                  NUM_BANKS(ADDR_SURF_8_BANK));
3444
3445                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3446                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3447                             reg_offset != 23)
3448                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3449
3450                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3451                         if (reg_offset != 7)
3452                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3453
3454                 break;
3455         }
3456 }
3457
3458 static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
3459                                   u32 se_num, u32 sh_num, u32 instance)
3460 {
3461         u32 data;
3462
3463         if (instance == 0xffffffff)
3464                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
3465         else
3466                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
3467
3468         if (se_num == 0xffffffff)
3469                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3470         else
3471                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3472
3473         if (sh_num == 0xffffffff)
3474                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3475         else
3476                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3477
3478         WREG32(mmGRBM_GFX_INDEX, data);
3479 }
3480
3481 static void gfx_v8_0_select_me_pipe_q(struct amdgpu_device *adev,
3482                                   u32 me, u32 pipe, u32 q)
3483 {
3484         vi_srbm_select(adev, me, pipe, q, 0);
3485 }
3486
3487 static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
3488 {
3489         u32 data, mask;
3490
3491         data =  RREG32(mmCC_RB_BACKEND_DISABLE) |
3492                 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3493
3494         data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE);
3495
3496         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
3497                                          adev->gfx.config.max_sh_per_se);
3498
3499         return (~data) & mask;
3500 }
3501
3502 static void
3503 gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1)
3504 {
3505         switch (adev->asic_type) {
3506         case CHIP_FIJI:
3507                 *rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) |
3508                           RB_XSEL2(1) | PKR_MAP(2) |
3509                           PKR_XSEL(1) | PKR_YSEL(1) |
3510                           SE_MAP(2) | SE_XSEL(2) | SE_YSEL(3);
3511                 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) |
3512                            SE_PAIR_YSEL(2);
3513                 break;
3514         case CHIP_TONGA:
3515         case CHIP_POLARIS10:
3516                 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3517                           SE_XSEL(1) | SE_YSEL(1);
3518                 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(2) |
3519                            SE_PAIR_YSEL(2);
3520                 break;
3521         case CHIP_TOPAZ:
3522         case CHIP_CARRIZO:
3523                 *rconf |= RB_MAP_PKR0(2);
3524                 *rconf1 |= 0x0;
3525                 break;
3526         case CHIP_POLARIS11:
3527         case CHIP_POLARIS12:
3528                 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3529                           SE_XSEL(1) | SE_YSEL(1);
3530                 *rconf1 |= 0x0;
3531                 break;
3532         case CHIP_STONEY:
3533                 *rconf |= 0x0;
3534                 *rconf1 |= 0x0;
3535                 break;
3536         default:
3537                 DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type);
3538                 break;
3539         }
3540 }
3541
3542 static void
3543 gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev,
3544                                         u32 raster_config, u32 raster_config_1,
3545                                         unsigned rb_mask, unsigned num_rb)
3546 {
3547         unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1);
3548         unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1);
3549         unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2);
3550         unsigned rb_per_se = num_rb / num_se;
3551         unsigned se_mask[4];
3552         unsigned se;
3553
3554         se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask;
3555         se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask;
3556         se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask;
3557         se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask;
3558
3559         WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4));
3560         WARN_ON(!(sh_per_se == 1 || sh_per_se == 2));
3561         WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2));
3562
3563         if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
3564                              (!se_mask[2] && !se_mask[3]))) {
3565                 raster_config_1 &= ~SE_PAIR_MAP_MASK;
3566
3567                 if (!se_mask[0] && !se_mask[1]) {
3568                         raster_config_1 |=
3569                                 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3);
3570                 } else {
3571                         raster_config_1 |=
3572                                 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0);
3573                 }
3574         }
3575
3576         for (se = 0; se < num_se; se++) {
3577                 unsigned raster_config_se = raster_config;
3578                 unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
3579                 unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
3580                 int idx = (se / 2) * 2;
3581
3582                 if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
3583                         raster_config_se &= ~SE_MAP_MASK;
3584
3585                         if (!se_mask[idx]) {
3586                                 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3);
3587                         } else {
3588                                 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0);
3589                         }
3590                 }
3591
3592                 pkr0_mask &= rb_mask;
3593                 pkr1_mask &= rb_mask;
3594                 if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
3595                         raster_config_se &= ~PKR_MAP_MASK;
3596
3597                         if (!pkr0_mask) {
3598                                 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3);
3599                         } else {
3600                                 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0);
3601                         }
3602                 }
3603
3604                 if (rb_per_se >= 2) {
3605                         unsigned rb0_mask = 1 << (se * rb_per_se);
3606                         unsigned rb1_mask = rb0_mask << 1;
3607
3608                         rb0_mask &= rb_mask;
3609                         rb1_mask &= rb_mask;
3610                         if (!rb0_mask || !rb1_mask) {
3611                                 raster_config_se &= ~RB_MAP_PKR0_MASK;
3612
3613                                 if (!rb0_mask) {
3614                                         raster_config_se |=
3615                                                 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3);
3616                                 } else {
3617                                         raster_config_se |=
3618                                                 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0);
3619                                 }
3620                         }
3621
3622                         if (rb_per_se > 2) {
3623                                 rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
3624                                 rb1_mask = rb0_mask << 1;
3625                                 rb0_mask &= rb_mask;
3626                                 rb1_mask &= rb_mask;
3627                                 if (!rb0_mask || !rb1_mask) {
3628                                         raster_config_se &= ~RB_MAP_PKR1_MASK;
3629
3630                                         if (!rb0_mask) {
3631                                                 raster_config_se |=
3632                                                         RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3);
3633                                         } else {
3634                                                 raster_config_se |=
3635                                                         RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0);
3636                                         }
3637                                 }
3638                         }
3639                 }
3640
3641                 /* GRBM_GFX_INDEX has a different offset on VI */
3642                 gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff);
3643                 WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se);
3644                 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3645         }
3646
3647         /* GRBM_GFX_INDEX has a different offset on VI */
3648         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3649 }
3650
3651 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
3652 {
3653         int i, j;
3654         u32 data;
3655         u32 raster_config = 0, raster_config_1 = 0;
3656         u32 active_rbs = 0;
3657         u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
3658                                         adev->gfx.config.max_sh_per_se;
3659         unsigned num_rb_pipes;
3660
3661         mutex_lock(&adev->grbm_idx_mutex);
3662         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3663                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3664                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3665                         data = gfx_v8_0_get_rb_active_bitmap(adev);
3666                         active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
3667                                                rb_bitmap_width_per_sh);
3668                 }
3669         }
3670         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3671
3672         adev->gfx.config.backend_enable_mask = active_rbs;
3673         adev->gfx.config.num_rbs = hweight32(active_rbs);
3674
3675         num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se *
3676                              adev->gfx.config.max_shader_engines, 16);
3677
3678         gfx_v8_0_raster_config(adev, &raster_config, &raster_config_1);
3679
3680         if (!adev->gfx.config.backend_enable_mask ||
3681                         adev->gfx.config.num_rbs >= num_rb_pipes) {
3682                 WREG32(mmPA_SC_RASTER_CONFIG, raster_config);
3683                 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3684         } else {
3685                 gfx_v8_0_write_harvested_raster_configs(adev, raster_config, raster_config_1,
3686                                                         adev->gfx.config.backend_enable_mask,
3687                                                         num_rb_pipes);
3688         }
3689
3690         /* cache the values for userspace */
3691         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3692                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3693                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3694                         adev->gfx.config.rb_config[i][j].rb_backend_disable =
3695                                 RREG32(mmCC_RB_BACKEND_DISABLE);
3696                         adev->gfx.config.rb_config[i][j].user_rb_backend_disable =
3697                                 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3698                         adev->gfx.config.rb_config[i][j].raster_config =
3699                                 RREG32(mmPA_SC_RASTER_CONFIG);
3700                         adev->gfx.config.rb_config[i][j].raster_config_1 =
3701                                 RREG32(mmPA_SC_RASTER_CONFIG_1);
3702                 }
3703         }
3704         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3705         mutex_unlock(&adev->grbm_idx_mutex);
3706 }
3707
3708 /**
3709  * gfx_v8_0_init_compute_vmid - gart enable
3710  *
3711  * @adev: amdgpu_device pointer
3712  *
3713  * Initialize compute vmid sh_mem registers
3714  *
3715  */
3716 #define DEFAULT_SH_MEM_BASES    (0x6000)
3717 #define FIRST_COMPUTE_VMID      (8)
3718 #define LAST_COMPUTE_VMID       (16)
3719 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
3720 {
3721         int i;
3722         uint32_t sh_mem_config;
3723         uint32_t sh_mem_bases;
3724
3725         /*
3726          * Configure apertures:
3727          * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
3728          * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
3729          * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
3730          */
3731         sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
3732
3733         sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
3734                         SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
3735                         SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
3736                         SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
3737                         MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
3738                         SH_MEM_CONFIG__PRIVATE_ATC_MASK;
3739
3740         mutex_lock(&adev->srbm_mutex);
3741         for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
3742                 vi_srbm_select(adev, 0, 0, 0, i);
3743                 /* CP and shaders */
3744                 WREG32(mmSH_MEM_CONFIG, sh_mem_config);
3745                 WREG32(mmSH_MEM_APE1_BASE, 1);
3746                 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3747                 WREG32(mmSH_MEM_BASES, sh_mem_bases);
3748         }
3749         vi_srbm_select(adev, 0, 0, 0, 0);
3750         mutex_unlock(&adev->srbm_mutex);
3751 }
3752
3753 static void gfx_v8_0_config_init(struct amdgpu_device *adev)
3754 {
3755         switch (adev->asic_type) {
3756         default:
3757                 adev->gfx.config.double_offchip_lds_buf = 1;
3758                 break;
3759         case CHIP_CARRIZO:
3760         case CHIP_STONEY:
3761                 adev->gfx.config.double_offchip_lds_buf = 0;
3762                 break;
3763         }
3764 }
3765
3766 static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
3767 {
3768         u32 tmp, sh_static_mem_cfg;
3769         int i;
3770
3771         WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF);
3772         WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3773         WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3774         WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
3775
3776         gfx_v8_0_tiling_mode_table_init(adev);
3777         gfx_v8_0_setup_rb(adev);
3778         gfx_v8_0_get_cu_info(adev);
3779         gfx_v8_0_config_init(adev);
3780
3781         /* XXX SH_MEM regs */
3782         /* where to put LDS, scratch, GPUVM in FSA64 space */
3783         sh_static_mem_cfg = REG_SET_FIELD(0, SH_STATIC_MEM_CONFIG,
3784                                    SWIZZLE_ENABLE, 1);
3785         sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3786                                    ELEMENT_SIZE, 1);
3787         sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3788                                    INDEX_STRIDE, 3);
3789         WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg);
3790
3791         mutex_lock(&adev->srbm_mutex);
3792         for (i = 0; i < adev->vm_manager.id_mgr[0].num_ids; i++) {
3793                 vi_srbm_select(adev, 0, 0, 0, i);
3794                 /* CP and shaders */
3795                 if (i == 0) {
3796                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
3797                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3798                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3799                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3800                         WREG32(mmSH_MEM_CONFIG, tmp);
3801                         WREG32(mmSH_MEM_BASES, 0);
3802                 } else {
3803                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
3804                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3805                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3806                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3807                         WREG32(mmSH_MEM_CONFIG, tmp);
3808                         tmp = adev->gmc.shared_aperture_start >> 48;
3809                         WREG32(mmSH_MEM_BASES, tmp);
3810                 }
3811
3812                 WREG32(mmSH_MEM_APE1_BASE, 1);
3813                 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3814         }
3815         vi_srbm_select(adev, 0, 0, 0, 0);
3816         mutex_unlock(&adev->srbm_mutex);
3817
3818         gfx_v8_0_init_compute_vmid(adev);
3819
3820         mutex_lock(&adev->grbm_idx_mutex);
3821         /*
3822          * making sure that the following register writes will be broadcasted
3823          * to all the shaders
3824          */
3825         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3826
3827         WREG32(mmPA_SC_FIFO_SIZE,
3828                    (adev->gfx.config.sc_prim_fifo_size_frontend <<
3829                         PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
3830                    (adev->gfx.config.sc_prim_fifo_size_backend <<
3831                         PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
3832                    (adev->gfx.config.sc_hiz_tile_fifo_size <<
3833                         PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
3834                    (adev->gfx.config.sc_earlyz_tile_fifo_size <<
3835                         PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
3836
3837         tmp = RREG32(mmSPI_ARB_PRIORITY);
3838         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS0, 2);
3839         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS1, 2);
3840         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS2, 2);
3841         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS3, 2);
3842         WREG32(mmSPI_ARB_PRIORITY, tmp);
3843
3844         mutex_unlock(&adev->grbm_idx_mutex);
3845
3846 }
3847
3848 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3849 {
3850         u32 i, j, k;
3851         u32 mask;
3852
3853         mutex_lock(&adev->grbm_idx_mutex);
3854         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3855                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3856                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3857                         for (k = 0; k < adev->usec_timeout; k++) {
3858                                 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3859                                         break;
3860                                 udelay(1);
3861                         }
3862                         if (k == adev->usec_timeout) {
3863                                 gfx_v8_0_select_se_sh(adev, 0xffffffff,
3864                                                       0xffffffff, 0xffffffff);
3865                                 mutex_unlock(&adev->grbm_idx_mutex);
3866                                 DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
3867                                          i, j);
3868                                 return;
3869                         }
3870                 }
3871         }
3872         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3873         mutex_unlock(&adev->grbm_idx_mutex);
3874
3875         mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3876                 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3877                 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3878                 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3879         for (k = 0; k < adev->usec_timeout; k++) {
3880                 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3881                         break;
3882                 udelay(1);
3883         }
3884 }
3885
3886 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3887                                                bool enable)
3888 {
3889         u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3890
3891         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
3892         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
3893         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
3894         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
3895
3896         WREG32(mmCP_INT_CNTL_RING0, tmp);
3897 }
3898
3899 static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
3900 {
3901         /* csib */
3902         WREG32(mmRLC_CSIB_ADDR_HI,
3903                         adev->gfx.rlc.clear_state_gpu_addr >> 32);
3904         WREG32(mmRLC_CSIB_ADDR_LO,
3905                         adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
3906         WREG32(mmRLC_CSIB_LENGTH,
3907                         adev->gfx.rlc.clear_state_size);
3908 }
3909
3910 static void gfx_v8_0_parse_ind_reg_list(int *register_list_format,
3911                                 int ind_offset,
3912                                 int list_size,
3913                                 int *unique_indices,
3914                                 int *indices_count,
3915                                 int max_indices,
3916                                 int *ind_start_offsets,
3917                                 int *offset_count,
3918                                 int max_offset)
3919 {
3920         int indices;
3921         bool new_entry = true;
3922
3923         for (; ind_offset < list_size; ind_offset++) {
3924
3925                 if (new_entry) {
3926                         new_entry = false;
3927                         ind_start_offsets[*offset_count] = ind_offset;
3928                         *offset_count = *offset_count + 1;
3929                         BUG_ON(*offset_count >= max_offset);
3930                 }
3931
3932                 if (register_list_format[ind_offset] == 0xFFFFFFFF) {
3933                         new_entry = true;
3934                         continue;
3935                 }
3936
3937                 ind_offset += 2;
3938
3939                 /* look for the matching indice */
3940                 for (indices = 0;
3941                         indices < *indices_count;
3942                         indices++) {
3943                         if (unique_indices[indices] ==
3944                                 register_list_format[ind_offset])
3945                                 break;
3946                 }
3947
3948                 if (indices >= *indices_count) {
3949                         unique_indices[*indices_count] =
3950                                 register_list_format[ind_offset];
3951                         indices = *indices_count;
3952                         *indices_count = *indices_count + 1;
3953                         BUG_ON(*indices_count >= max_indices);
3954                 }
3955
3956                 register_list_format[ind_offset] = indices;
3957         }
3958 }
3959
3960 static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
3961 {
3962         int i, temp, data;
3963         int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0};
3964         int indices_count = 0;
3965         int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3966         int offset_count = 0;
3967
3968         int list_size;
3969         unsigned int *register_list_format =
3970                 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
3971         if (!register_list_format)
3972                 return -ENOMEM;
3973         memcpy(register_list_format, adev->gfx.rlc.register_list_format,
3974                         adev->gfx.rlc.reg_list_format_size_bytes);
3975
3976         gfx_v8_0_parse_ind_reg_list(register_list_format,
3977                                 RLC_FormatDirectRegListLength,
3978                                 adev->gfx.rlc.reg_list_format_size_bytes >> 2,
3979                                 unique_indices,
3980                                 &indices_count,
3981                                 ARRAY_SIZE(unique_indices),
3982                                 indirect_start_offsets,
3983                                 &offset_count,
3984                                 ARRAY_SIZE(indirect_start_offsets));
3985
3986         /* save and restore list */
3987         WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1);
3988
3989         WREG32(mmRLC_SRM_ARAM_ADDR, 0);
3990         for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
3991                 WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]);
3992
3993         /* indirect list */
3994         WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start);
3995         for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
3996                 WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]);
3997
3998         list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
3999         list_size = list_size >> 1;
4000         WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size);
4001         WREG32(mmRLC_GPM_SCRATCH_DATA, list_size);
4002
4003         /* starting offsets starts */
4004         WREG32(mmRLC_GPM_SCRATCH_ADDR,
4005                 adev->gfx.rlc.starting_offsets_start);
4006         for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
4007                 WREG32(mmRLC_GPM_SCRATCH_DATA,
4008                                 indirect_start_offsets[i]);
4009
4010         /* unique indices */
4011         temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
4012         data = mmRLC_SRM_INDEX_CNTL_DATA_0;
4013         for (i = 0; i < ARRAY_SIZE(unique_indices); i++) {
4014                 if (unique_indices[i] != 0) {
4015                         WREG32(temp + i, unique_indices[i] & 0x3FFFF);
4016                         WREG32(data + i, unique_indices[i] >> 20);
4017                 }
4018         }
4019         kfree(register_list_format);
4020
4021         return 0;
4022 }
4023
4024 static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
4025 {
4026         WREG32_FIELD(RLC_SRM_CNTL, SRM_ENABLE, 1);
4027 }
4028
4029 static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev)
4030 {
4031         uint32_t data;
4032
4033         WREG32_FIELD(CP_RB_WPTR_POLL_CNTL, IDLE_POLL_COUNT, 0x60);
4034
4035         data = REG_SET_FIELD(0, RLC_PG_DELAY, POWER_UP_DELAY, 0x10);
4036         data = REG_SET_FIELD(data, RLC_PG_DELAY, POWER_DOWN_DELAY, 0x10);
4037         data = REG_SET_FIELD(data, RLC_PG_DELAY, CMD_PROPAGATE_DELAY, 0x10);
4038         data = REG_SET_FIELD(data, RLC_PG_DELAY, MEM_SLEEP_DELAY, 0x10);
4039         WREG32(mmRLC_PG_DELAY, data);
4040
4041         WREG32_FIELD(RLC_PG_DELAY_2, SERDES_CMD_DELAY, 0x3);
4042         WREG32_FIELD(RLC_AUTO_PG_CTRL, GRBM_REG_SAVE_GFX_IDLE_THRESHOLD, 0x55f0);
4043
4044 }
4045
4046 static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
4047                                                 bool enable)
4048 {
4049         WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PU_ENABLE, enable ? 1 : 0);
4050 }
4051
4052 static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
4053                                                   bool enable)
4054 {
4055         WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PD_ENABLE, enable ? 1 : 0);
4056 }
4057
4058 static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable)
4059 {
4060         WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 0 : 1);
4061 }
4062
4063 static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
4064 {
4065         if ((adev->asic_type == CHIP_CARRIZO) ||
4066             (adev->asic_type == CHIP_STONEY)) {
4067                 gfx_v8_0_init_csb(adev);
4068                 gfx_v8_0_init_save_restore_list(adev);
4069                 gfx_v8_0_enable_save_restore_machine(adev);
4070                 WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
4071                 gfx_v8_0_init_power_gating(adev);
4072                 WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
4073         } else if ((adev->asic_type == CHIP_POLARIS11) ||
4074                    (adev->asic_type == CHIP_POLARIS12)) {
4075                 gfx_v8_0_init_csb(adev);
4076                 gfx_v8_0_init_save_restore_list(adev);
4077                 gfx_v8_0_enable_save_restore_machine(adev);
4078                 gfx_v8_0_init_power_gating(adev);
4079         }
4080
4081 }
4082
4083 static void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
4084 {
4085         WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 0);
4086
4087         gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4088         gfx_v8_0_wait_for_rlc_serdes(adev);
4089 }
4090
4091 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
4092 {
4093         WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4094         udelay(50);
4095
4096         WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
4097         udelay(50);
4098 }
4099
4100 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
4101 {
4102         WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 1);
4103
4104         /* carrizo do enable cp interrupt after cp inited */
4105         if (!(adev->flags & AMD_IS_APU))
4106                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4107
4108         udelay(50);
4109 }
4110
4111 static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev)
4112 {
4113         const struct rlc_firmware_header_v2_0 *hdr;
4114         const __le32 *fw_data;
4115         unsigned i, fw_size;
4116
4117         if (!adev->gfx.rlc_fw)
4118                 return -EINVAL;
4119
4120         hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
4121         amdgpu_ucode_print_rlc_hdr(&hdr->header);
4122
4123         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
4124                            le32_to_cpu(hdr->header.ucode_array_offset_bytes));
4125         fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
4126
4127         WREG32(mmRLC_GPM_UCODE_ADDR, 0);
4128         for (i = 0; i < fw_size; i++)
4129                 WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
4130         WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
4131
4132         return 0;
4133 }
4134
4135 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
4136 {
4137         int r;
4138         u32 tmp;
4139
4140         gfx_v8_0_rlc_stop(adev);
4141
4142         /* disable CG */
4143         tmp = RREG32(mmRLC_CGCG_CGLS_CTRL);
4144         tmp &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
4145                  RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4146         WREG32(mmRLC_CGCG_CGLS_CTRL, tmp);
4147         if (adev->asic_type == CHIP_POLARIS11 ||
4148             adev->asic_type == CHIP_POLARIS10 ||
4149             adev->asic_type == CHIP_POLARIS12) {
4150                 tmp = RREG32(mmRLC_CGCG_CGLS_CTRL_3D);
4151                 tmp &= ~0x3;
4152                 WREG32(mmRLC_CGCG_CGLS_CTRL_3D, tmp);
4153         }
4154
4155         /* disable PG */
4156         WREG32(mmRLC_PG_CNTL, 0);
4157
4158         gfx_v8_0_rlc_reset(adev);
4159         gfx_v8_0_init_pg(adev);
4160
4161
4162         if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
4163                 /* legacy rlc firmware loading */
4164                 r = gfx_v8_0_rlc_load_microcode(adev);
4165                 if (r)
4166                         return r;
4167         }
4168
4169         gfx_v8_0_rlc_start(adev);
4170
4171         return 0;
4172 }
4173
4174 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
4175 {
4176         int i;
4177         u32 tmp = RREG32(mmCP_ME_CNTL);
4178
4179         if (enable) {
4180                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
4181                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
4182                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
4183         } else {
4184                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
4185                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
4186                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
4187                 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
4188                         adev->gfx.gfx_ring[i].ready = false;
4189         }
4190         WREG32(mmCP_ME_CNTL, tmp);
4191         udelay(50);
4192 }
4193
4194 static int gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
4195 {
4196         const struct gfx_firmware_header_v1_0 *pfp_hdr;
4197         const struct gfx_firmware_header_v1_0 *ce_hdr;
4198         const struct gfx_firmware_header_v1_0 *me_hdr;
4199         const __le32 *fw_data;
4200         unsigned i, fw_size;
4201
4202         if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
4203                 return -EINVAL;
4204
4205         pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
4206                 adev->gfx.pfp_fw->data;
4207         ce_hdr = (const struct gfx_firmware_header_v1_0 *)
4208                 adev->gfx.ce_fw->data;
4209         me_hdr = (const struct gfx_firmware_header_v1_0 *)
4210                 adev->gfx.me_fw->data;
4211
4212         amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
4213         amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
4214         amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
4215
4216         gfx_v8_0_cp_gfx_enable(adev, false);
4217
4218         /* PFP */
4219         fw_data = (const __le32 *)
4220                 (adev->gfx.pfp_fw->data +
4221                  le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
4222         fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
4223         WREG32(mmCP_PFP_UCODE_ADDR, 0);
4224         for (i = 0; i < fw_size; i++)
4225                 WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
4226         WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
4227
4228         /* CE */
4229         fw_data = (const __le32 *)
4230                 (adev->gfx.ce_fw->data +
4231                  le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
4232         fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
4233         WREG32(mmCP_CE_UCODE_ADDR, 0);
4234         for (i = 0; i < fw_size; i++)
4235                 WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
4236         WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
4237
4238         /* ME */
4239         fw_data = (const __le32 *)
4240                 (adev->gfx.me_fw->data +
4241                  le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
4242         fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
4243         WREG32(mmCP_ME_RAM_WADDR, 0);
4244         for (i = 0; i < fw_size; i++)
4245                 WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
4246         WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
4247
4248         return 0;
4249 }
4250
4251 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
4252 {
4253         u32 count = 0;
4254         const struct cs_section_def *sect = NULL;
4255         const struct cs_extent_def *ext = NULL;
4256
4257         /* begin clear state */
4258         count += 2;
4259         /* context control state */
4260         count += 3;
4261
4262         for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4263                 for (ext = sect->section; ext->extent != NULL; ++ext) {
4264                         if (sect->id == SECT_CONTEXT)
4265                                 count += 2 + ext->reg_count;
4266                         else
4267                                 return 0;
4268                 }
4269         }
4270         /* pa_sc_raster_config/pa_sc_raster_config1 */
4271         count += 4;
4272         /* end clear state */
4273         count += 2;
4274         /* clear state */
4275         count += 2;
4276
4277         return count;
4278 }
4279
4280 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
4281 {
4282         struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
4283         const struct cs_section_def *sect = NULL;
4284         const struct cs_extent_def *ext = NULL;
4285         int r, i;
4286
4287         /* init the CP */
4288         WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
4289         WREG32(mmCP_ENDIAN_SWAP, 0);
4290         WREG32(mmCP_DEVICE_ID, 1);
4291
4292         gfx_v8_0_cp_gfx_enable(adev, true);
4293
4294         r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
4295         if (r) {
4296                 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
4297                 return r;
4298         }
4299
4300         /* clear state buffer */
4301         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4302         amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4303
4304         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4305         amdgpu_ring_write(ring, 0x80000000);
4306         amdgpu_ring_write(ring, 0x80000000);
4307
4308         for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4309                 for (ext = sect->section; ext->extent != NULL; ++ext) {
4310                         if (sect->id == SECT_CONTEXT) {
4311                                 amdgpu_ring_write(ring,
4312                                        PACKET3(PACKET3_SET_CONTEXT_REG,
4313                                                ext->reg_count));
4314                                 amdgpu_ring_write(ring,
4315                                        ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
4316                                 for (i = 0; i < ext->reg_count; i++)
4317                                         amdgpu_ring_write(ring, ext->extent[i]);
4318                         }
4319                 }
4320         }
4321
4322         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4323         amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
4324         amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config);
4325         amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config_1);
4326
4327         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4328         amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4329
4330         amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4331         amdgpu_ring_write(ring, 0);
4332
4333         /* init the CE partitions */
4334         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4335         amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4336         amdgpu_ring_write(ring, 0x8000);
4337         amdgpu_ring_write(ring, 0x8000);
4338
4339         amdgpu_ring_commit(ring);
4340
4341         return 0;
4342 }
4343 static void gfx_v8_0_set_cpg_door_bell(struct amdgpu_device *adev, struct amdgpu_ring *ring)
4344 {
4345         u32 tmp;
4346         /* no gfx doorbells on iceland */
4347         if (adev->asic_type == CHIP_TOPAZ)
4348                 return;
4349
4350         tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
4351
4352         if (ring->use_doorbell) {
4353                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4354                                 DOORBELL_OFFSET, ring->doorbell_index);
4355                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4356                                                 DOORBELL_HIT, 0);
4357                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4358                                             DOORBELL_EN, 1);
4359         } else {
4360                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
4361         }
4362
4363         WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
4364
4365         if (adev->flags & AMD_IS_APU)
4366                 return;
4367
4368         tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
4369                                         DOORBELL_RANGE_LOWER,
4370                                         AMDGPU_DOORBELL_GFX_RING0);
4371         WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
4372
4373         WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
4374                 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
4375 }
4376
4377 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
4378 {
4379         struct amdgpu_ring *ring;
4380         u32 tmp;
4381         u32 rb_bufsz;
4382         u64 rb_addr, rptr_addr, wptr_gpu_addr;
4383         int r;
4384
4385         /* Set the write pointer delay */
4386         WREG32(mmCP_RB_WPTR_DELAY, 0);
4387
4388         /* set the RB to use vmid 0 */
4389         WREG32(mmCP_RB_VMID, 0);
4390
4391         /* Set ring buffer size */
4392         ring = &adev->gfx.gfx_ring[0];
4393         rb_bufsz = order_base_2(ring->ring_size / 8);
4394         tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
4395         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
4396         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
4397         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
4398 #ifdef __BIG_ENDIAN
4399         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
4400 #endif
4401         WREG32(mmCP_RB0_CNTL, tmp);
4402
4403         /* Initialize the ring buffer's read and write pointers */
4404         WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
4405         ring->wptr = 0;
4406         WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
4407
4408         /* set the wb address wether it's enabled or not */
4409         rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4410         WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
4411         WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
4412
4413         wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4414         WREG32(mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
4415         WREG32(mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
4416         mdelay(1);
4417         WREG32(mmCP_RB0_CNTL, tmp);
4418
4419         rb_addr = ring->gpu_addr >> 8;
4420         WREG32(mmCP_RB0_BASE, rb_addr);
4421         WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
4422
4423         gfx_v8_0_set_cpg_door_bell(adev, ring);
4424         /* start the ring */
4425         amdgpu_ring_clear_ring(ring);
4426         gfx_v8_0_cp_gfx_start(adev);
4427         ring->ready = true;
4428         r = amdgpu_ring_test_ring(ring);
4429         if (r)
4430                 ring->ready = false;
4431
4432         return r;
4433 }
4434
4435 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
4436 {
4437         int i;
4438
4439         if (enable) {
4440                 WREG32(mmCP_MEC_CNTL, 0);
4441         } else {
4442                 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
4443                 for (i = 0; i < adev->gfx.num_compute_rings; i++)
4444                         adev->gfx.compute_ring[i].ready = false;
4445                 adev->gfx.kiq.ring.ready = false;
4446         }
4447         udelay(50);
4448 }
4449
4450 static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev)
4451 {
4452         const struct gfx_firmware_header_v1_0 *mec_hdr;
4453         const __le32 *fw_data;
4454         unsigned i, fw_size;
4455
4456         if (!adev->gfx.mec_fw)
4457                 return -EINVAL;
4458
4459         gfx_v8_0_cp_compute_enable(adev, false);
4460
4461         mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
4462         amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
4463
4464         fw_data = (const __le32 *)
4465                 (adev->gfx.mec_fw->data +
4466                  le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4467         fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4468
4469         /* MEC1 */
4470         WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0);
4471         for (i = 0; i < fw_size; i++)
4472                 WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data+i));
4473         WREG32(mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version);
4474
4475         /* Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
4476         if (adev->gfx.mec2_fw) {
4477                 const struct gfx_firmware_header_v1_0 *mec2_hdr;
4478
4479                 mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
4480                 amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header);
4481
4482                 fw_data = (const __le32 *)
4483                         (adev->gfx.mec2_fw->data +
4484                          le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4485                 fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4486
4487                 WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0);
4488                 for (i = 0; i < fw_size; i++)
4489                         WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data+i));
4490                 WREG32(mmCP_MEC_ME2_UCODE_ADDR, adev->gfx.mec2_fw_version);
4491         }
4492
4493         return 0;
4494 }
4495
4496 /* KIQ functions */
4497 static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring)
4498 {
4499         uint32_t tmp;
4500         struct amdgpu_device *adev = ring->adev;
4501
4502         /* tell RLC which is KIQ queue */
4503         tmp = RREG32(mmRLC_CP_SCHEDULERS);
4504         tmp &= 0xffffff00;
4505         tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
4506         WREG32(mmRLC_CP_SCHEDULERS, tmp);
4507         tmp |= 0x80;
4508         WREG32(mmRLC_CP_SCHEDULERS, tmp);
4509 }
4510
4511 static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev)
4512 {
4513         struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
4514         uint32_t scratch, tmp = 0;
4515         uint64_t queue_mask = 0;
4516         int r, i;
4517
4518         for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
4519                 if (!test_bit(i, adev->gfx.mec.queue_bitmap))
4520                         continue;
4521
4522                 /* This situation may be hit in the future if a new HW
4523                  * generation exposes more than 64 queues. If so, the
4524                  * definition of queue_mask needs updating */
4525                 if (WARN_ON(i >= (sizeof(queue_mask)*8))) {
4526                         DRM_ERROR("Invalid KCQ enabled: %d\n", i);
4527                         break;
4528                 }
4529
4530                 queue_mask |= (1ull << i);
4531         }
4532
4533         r = amdgpu_gfx_scratch_get(adev, &scratch);
4534         if (r) {
4535                 DRM_ERROR("Failed to get scratch reg (%d).\n", r);
4536                 return r;
4537         }
4538         WREG32(scratch, 0xCAFEDEAD);
4539
4540         r = amdgpu_ring_alloc(kiq_ring, (8 * adev->gfx.num_compute_rings) + 11);
4541         if (r) {
4542                 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
4543                 amdgpu_gfx_scratch_free(adev, scratch);
4544                 return r;
4545         }
4546         /* set resources */
4547         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
4548         amdgpu_ring_write(kiq_ring, 0); /* vmid_mask:0 queue_type:0 (KIQ) */
4549         amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */
4550         amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */
4551         amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
4552         amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
4553         amdgpu_ring_write(kiq_ring, 0); /* oac mask */
4554         amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
4555         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4556                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4557                 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
4558                 uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4559
4560                 /* map queues */
4561                 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
4562                 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
4563                 amdgpu_ring_write(kiq_ring,
4564                                   PACKET3_MAP_QUEUES_NUM_QUEUES(1));
4565                 amdgpu_ring_write(kiq_ring,
4566                                   PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index) |
4567                                   PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
4568                                   PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
4569                                   PACKET3_MAP_QUEUES_ME(ring->me == 1 ? 0 : 1)); /* doorbell */
4570                 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
4571                 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
4572                 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
4573                 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
4574         }
4575         /* write to scratch for completion */
4576         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
4577         amdgpu_ring_write(kiq_ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
4578         amdgpu_ring_write(kiq_ring, 0xDEADBEEF);
4579         amdgpu_ring_commit(kiq_ring);
4580
4581         for (i = 0; i < adev->usec_timeout; i++) {
4582                 tmp = RREG32(scratch);
4583                 if (tmp == 0xDEADBEEF)
4584                         break;
4585                 DRM_UDELAY(1);
4586         }
4587         if (i >= adev->usec_timeout) {
4588                 DRM_ERROR("KCQ enable failed (scratch(0x%04X)=0x%08X)\n",
4589                           scratch, tmp);
4590                 r = -EINVAL;
4591         }
4592         amdgpu_gfx_scratch_free(adev, scratch);
4593
4594         return r;
4595 }
4596
4597 static int gfx_v8_0_deactivate_hqd(struct amdgpu_device *adev, u32 req)
4598 {
4599         int i, r = 0;
4600
4601         if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) {
4602                 WREG32_FIELD(CP_HQD_DEQUEUE_REQUEST, DEQUEUE_REQ, req);
4603                 for (i = 0; i < adev->usec_timeout; i++) {
4604                         if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK))
4605                                 break;
4606                         udelay(1);
4607                 }
4608                 if (i == adev->usec_timeout)
4609                         r = -ETIMEDOUT;
4610         }
4611         WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
4612         WREG32(mmCP_HQD_PQ_RPTR, 0);
4613         WREG32(mmCP_HQD_PQ_WPTR, 0);
4614
4615         return r;
4616 }
4617
4618 static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
4619 {
4620         struct amdgpu_device *adev = ring->adev;
4621         struct vi_mqd *mqd = ring->mqd_ptr;
4622         uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
4623         uint32_t tmp;
4624
4625         mqd->header = 0xC0310800;
4626         mqd->compute_pipelinestat_enable = 0x00000001;
4627         mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4628         mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4629         mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4630         mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4631         mqd->compute_misc_reserved = 0x00000003;
4632         mqd->dynamic_cu_mask_addr_lo = lower_32_bits(ring->mqd_gpu_addr
4633                                                      + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4634         mqd->dynamic_cu_mask_addr_hi = upper_32_bits(ring->mqd_gpu_addr
4635                                                      + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4636         eop_base_addr = ring->eop_gpu_addr >> 8;
4637         mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
4638         mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
4639
4640         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4641         tmp = RREG32(mmCP_HQD_EOP_CONTROL);
4642         tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
4643                         (order_base_2(GFX8_MEC_HPD_SIZE / 4) - 1));
4644
4645         mqd->cp_hqd_eop_control = tmp;
4646
4647         /* enable doorbell? */
4648         tmp = REG_SET_FIELD(RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL),
4649                             CP_HQD_PQ_DOORBELL_CONTROL,
4650                             DOORBELL_EN,
4651                             ring->use_doorbell ? 1 : 0);
4652
4653         mqd->cp_hqd_pq_doorbell_control = tmp;
4654
4655         /* set the pointer to the MQD */
4656         mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
4657         mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
4658
4659         /* set MQD vmid to 0 */
4660         tmp = RREG32(mmCP_MQD_CONTROL);
4661         tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4662         mqd->cp_mqd_control = tmp;
4663
4664         /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4665         hqd_gpu_addr = ring->gpu_addr >> 8;
4666         mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4667         mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4668
4669         /* set up the HQD, this is similar to CP_RB0_CNTL */
4670         tmp = RREG32(mmCP_HQD_PQ_CONTROL);
4671         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4672                             (order_base_2(ring->ring_size / 4) - 1));
4673         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4674                         ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
4675 #ifdef __BIG_ENDIAN
4676         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
4677 #endif
4678         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
4679         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
4680         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4681         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4682         mqd->cp_hqd_pq_control = tmp;
4683
4684         /* set the wb address whether it's enabled or not */
4685         wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4686         mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4687         mqd->cp_hqd_pq_rptr_report_addr_hi =
4688                 upper_32_bits(wb_gpu_addr) & 0xffff;
4689
4690         /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4691         wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4692         mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
4693         mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4694
4695         tmp = 0;
4696         /* enable the doorbell if requested */
4697         if (ring->use_doorbell) {
4698                 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4699                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4700                                 DOORBELL_OFFSET, ring->doorbell_index);
4701
4702                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4703                                          DOORBELL_EN, 1);
4704                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4705                                          DOORBELL_SOURCE, 0);
4706                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4707                                          DOORBELL_HIT, 0);
4708         }
4709
4710         mqd->cp_hqd_pq_doorbell_control = tmp;
4711
4712         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4713         ring->wptr = 0;
4714         mqd->cp_hqd_pq_wptr = ring->wptr;
4715         mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
4716
4717         /* set the vmid for the queue */
4718         mqd->cp_hqd_vmid = 0;
4719
4720         tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
4721         tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
4722         mqd->cp_hqd_persistent_state = tmp;
4723
4724         /* set MTYPE */
4725         tmp = RREG32(mmCP_HQD_IB_CONTROL);
4726         tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
4727         tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MTYPE, 3);
4728         mqd->cp_hqd_ib_control = tmp;
4729
4730         tmp = RREG32(mmCP_HQD_IQ_TIMER);
4731         tmp = REG_SET_FIELD(tmp, CP_HQD_IQ_TIMER, MTYPE, 3);
4732         mqd->cp_hqd_iq_timer = tmp;
4733
4734         tmp = RREG32(mmCP_HQD_CTX_SAVE_CONTROL);
4735         tmp = REG_SET_FIELD(tmp, CP_HQD_CTX_SAVE_CONTROL, MTYPE, 3);
4736         mqd->cp_hqd_ctx_save_control = tmp;
4737
4738         /* defaults */
4739         mqd->cp_hqd_eop_rptr = RREG32(mmCP_HQD_EOP_RPTR);
4740         mqd->cp_hqd_eop_wptr = RREG32(mmCP_HQD_EOP_WPTR);
4741         mqd->cp_hqd_pipe_priority = RREG32(mmCP_HQD_PIPE_PRIORITY);
4742         mqd->cp_hqd_queue_priority = RREG32(mmCP_HQD_QUEUE_PRIORITY);
4743         mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM);
4744         mqd->cp_hqd_ctx_save_base_addr_lo = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_LO);
4745         mqd->cp_hqd_ctx_save_base_addr_hi = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_HI);
4746         mqd->cp_hqd_cntl_stack_offset = RREG32(mmCP_HQD_CNTL_STACK_OFFSET);
4747         mqd->cp_hqd_cntl_stack_size = RREG32(mmCP_HQD_CNTL_STACK_SIZE);
4748         mqd->cp_hqd_wg_state_offset = RREG32(mmCP_HQD_WG_STATE_OFFSET);
4749         mqd->cp_hqd_ctx_save_size = RREG32(mmCP_HQD_CTX_SAVE_SIZE);
4750         mqd->cp_hqd_eop_done_events = RREG32(mmCP_HQD_EOP_EVENTS);
4751         mqd->cp_hqd_error = RREG32(mmCP_HQD_ERROR);
4752         mqd->cp_hqd_eop_wptr_mem = RREG32(mmCP_HQD_EOP_WPTR_MEM);
4753         mqd->cp_hqd_eop_dones = RREG32(mmCP_HQD_EOP_DONES);
4754
4755         /* activate the queue */
4756         mqd->cp_hqd_active = 1;
4757
4758         return 0;
4759 }
4760
4761 int gfx_v8_0_mqd_commit(struct amdgpu_device *adev,
4762                         struct vi_mqd *mqd)
4763 {
4764         uint32_t mqd_reg;
4765         uint32_t *mqd_data;
4766
4767         /* HQD registers extend from mmCP_MQD_BASE_ADDR to mmCP_HQD_ERROR */
4768         mqd_data = &mqd->cp_mqd_base_addr_lo;
4769
4770         /* disable wptr polling */
4771         WREG32_FIELD(CP_PQ_WPTR_POLL_CNTL, EN, 0);
4772
4773         /* program all HQD registers */
4774         for (mqd_reg = mmCP_HQD_VMID; mqd_reg <= mmCP_HQD_EOP_CONTROL; mqd_reg++)
4775                 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4776
4777         /* Tonga errata: EOP RPTR/WPTR should be left unmodified.
4778          * This is safe since EOP RPTR==WPTR for any inactive HQD
4779          * on ASICs that do not support context-save.
4780          * EOP writes/reads can start anywhere in the ring.
4781          */
4782         if (adev->asic_type != CHIP_TONGA) {
4783                 WREG32(mmCP_HQD_EOP_RPTR, mqd->cp_hqd_eop_rptr);
4784                 WREG32(mmCP_HQD_EOP_WPTR, mqd->cp_hqd_eop_wptr);
4785                 WREG32(mmCP_HQD_EOP_WPTR_MEM, mqd->cp_hqd_eop_wptr_mem);
4786         }
4787
4788         for (mqd_reg = mmCP_HQD_EOP_EVENTS; mqd_reg <= mmCP_HQD_ERROR; mqd_reg++)
4789                 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4790
4791         /* activate the HQD */
4792         for (mqd_reg = mmCP_MQD_BASE_ADDR; mqd_reg <= mmCP_HQD_ACTIVE; mqd_reg++)
4793                 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4794
4795         return 0;
4796 }
4797
4798 static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring)
4799 {
4800         struct amdgpu_device *adev = ring->adev;
4801         struct vi_mqd *mqd = ring->mqd_ptr;
4802         int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
4803
4804         gfx_v8_0_kiq_setting(ring);
4805
4806         if (adev->in_gpu_reset) { /* for GPU_RESET case */
4807                 /* reset MQD to a clean status */
4808                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4809                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4810
4811                 /* reset ring buffer */
4812                 ring->wptr = 0;
4813                 amdgpu_ring_clear_ring(ring);
4814                 mutex_lock(&adev->srbm_mutex);
4815                 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4816                 gfx_v8_0_mqd_commit(adev, mqd);
4817                 vi_srbm_select(adev, 0, 0, 0, 0);
4818                 mutex_unlock(&adev->srbm_mutex);
4819         } else {
4820                 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4821                 ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4822                 ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
4823                 mutex_lock(&adev->srbm_mutex);
4824                 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4825                 gfx_v8_0_mqd_init(ring);
4826                 gfx_v8_0_mqd_commit(adev, mqd);
4827                 vi_srbm_select(adev, 0, 0, 0, 0);
4828                 mutex_unlock(&adev->srbm_mutex);
4829
4830                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4831                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4832         }
4833
4834         return 0;
4835 }
4836
4837 static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring)
4838 {
4839         struct amdgpu_device *adev = ring->adev;
4840         struct vi_mqd *mqd = ring->mqd_ptr;
4841         int mqd_idx = ring - &adev->gfx.compute_ring[0];
4842
4843         if (!adev->in_gpu_reset && !adev->gfx.in_suspend) {
4844                 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4845                 ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4846                 ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
4847                 mutex_lock(&adev->srbm_mutex);
4848                 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4849                 gfx_v8_0_mqd_init(ring);
4850                 vi_srbm_select(adev, 0, 0, 0, 0);
4851                 mutex_unlock(&adev->srbm_mutex);
4852
4853                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4854                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4855         } else if (adev->in_gpu_reset) { /* for GPU_RESET case */
4856                 /* reset MQD to a clean status */
4857                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4858                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4859                 /* reset ring buffer */
4860                 ring->wptr = 0;
4861                 amdgpu_ring_clear_ring(ring);
4862         } else {
4863                 amdgpu_ring_clear_ring(ring);
4864         }
4865         return 0;
4866 }
4867
4868 static void gfx_v8_0_set_mec_doorbell_range(struct amdgpu_device *adev)
4869 {
4870         if (adev->asic_type > CHIP_TONGA) {
4871                 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, AMDGPU_DOORBELL_KIQ << 2);
4872                 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, AMDGPU_DOORBELL_MEC_RING7 << 2);
4873         }
4874         /* enable doorbells */
4875         WREG32_FIELD(CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4876 }
4877
4878 static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev)
4879 {
4880         struct amdgpu_ring *ring = NULL;
4881         int r = 0, i;
4882
4883         gfx_v8_0_cp_compute_enable(adev, true);
4884
4885         ring = &adev->gfx.kiq.ring;
4886
4887         r = amdgpu_bo_reserve(ring->mqd_obj, false);
4888         if (unlikely(r != 0))
4889                 goto done;
4890
4891         r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4892         if (!r) {
4893                 r = gfx_v8_0_kiq_init_queue(ring);
4894                 amdgpu_bo_kunmap(ring->mqd_obj);
4895                 ring->mqd_ptr = NULL;
4896         }
4897         amdgpu_bo_unreserve(ring->mqd_obj);
4898         if (r)
4899                 goto done;
4900
4901         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4902                 ring = &adev->gfx.compute_ring[i];
4903
4904                 r = amdgpu_bo_reserve(ring->mqd_obj, false);
4905                 if (unlikely(r != 0))
4906                         goto done;
4907                 r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4908                 if (!r) {
4909                         r = gfx_v8_0_kcq_init_queue(ring);
4910                         amdgpu_bo_kunmap(ring->mqd_obj);
4911                         ring->mqd_ptr = NULL;
4912                 }
4913                 amdgpu_bo_unreserve(ring->mqd_obj);
4914                 if (r)
4915                         goto done;
4916         }
4917
4918         gfx_v8_0_set_mec_doorbell_range(adev);
4919
4920         r = gfx_v8_0_kiq_kcq_enable(adev);
4921         if (r)
4922                 goto done;
4923
4924         /* Test KIQ */
4925         ring = &adev->gfx.kiq.ring;
4926         ring->ready = true;
4927         r = amdgpu_ring_test_ring(ring);
4928         if (r) {
4929                 ring->ready = false;
4930                 goto done;
4931         }
4932
4933         /* Test KCQs */
4934         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4935                 ring = &adev->gfx.compute_ring[i];
4936                 ring->ready = true;
4937                 r = amdgpu_ring_test_ring(ring);
4938                 if (r)
4939                         ring->ready = false;
4940         }
4941
4942 done:
4943         return r;
4944 }
4945
4946 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
4947 {
4948         int r;
4949
4950         if (!(adev->flags & AMD_IS_APU))
4951                 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4952
4953         if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
4954                         /* legacy firmware loading */
4955                 r = gfx_v8_0_cp_gfx_load_microcode(adev);
4956                 if (r)
4957                         return r;
4958
4959                 r = gfx_v8_0_cp_compute_load_microcode(adev);
4960                 if (r)
4961                         return r;
4962         }
4963
4964         r = gfx_v8_0_cp_gfx_resume(adev);
4965         if (r)
4966                 return r;
4967
4968         r = gfx_v8_0_kiq_resume(adev);
4969         if (r)
4970                 return r;
4971
4972         gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4973
4974         return 0;
4975 }
4976
4977 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
4978 {
4979         gfx_v8_0_cp_gfx_enable(adev, enable);
4980         gfx_v8_0_cp_compute_enable(adev, enable);
4981 }
4982
4983 static int gfx_v8_0_hw_init(void *handle)
4984 {
4985         int r;
4986         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4987
4988         gfx_v8_0_init_golden_registers(adev);
4989         gfx_v8_0_gpu_init(adev);
4990
4991         r = gfx_v8_0_rlc_resume(adev);
4992         if (r)
4993                 return r;
4994
4995         r = gfx_v8_0_cp_resume(adev);
4996
4997         return r;
4998 }
4999
5000 static int gfx_v8_0_kcq_disable(struct amdgpu_ring *kiq_ring,struct amdgpu_ring *ring)
5001 {
5002         struct amdgpu_device *adev = kiq_ring->adev;
5003         uint32_t scratch, tmp = 0;
5004         int r, i;
5005
5006         r = amdgpu_gfx_scratch_get(adev, &scratch);
5007         if (r) {
5008                 DRM_ERROR("Failed to get scratch reg (%d).\n", r);
5009                 return r;
5010         }
5011         WREG32(scratch, 0xCAFEDEAD);
5012
5013         r = amdgpu_ring_alloc(kiq_ring, 10);
5014         if (r) {
5015                 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
5016                 amdgpu_gfx_scratch_free(adev, scratch);
5017                 return r;
5018         }
5019
5020         /* unmap queues */
5021         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
5022         amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
5023                                                 PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */
5024                                                 PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
5025                                                 PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) |
5026                                                 PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
5027         amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
5028         amdgpu_ring_write(kiq_ring, 0);
5029         amdgpu_ring_write(kiq_ring, 0);
5030         amdgpu_ring_write(kiq_ring, 0);
5031         /* write to scratch for completion */
5032         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
5033         amdgpu_ring_write(kiq_ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
5034         amdgpu_ring_write(kiq_ring, 0xDEADBEEF);
5035         amdgpu_ring_commit(kiq_ring);
5036
5037         for (i = 0; i < adev->usec_timeout; i++) {
5038                 tmp = RREG32(scratch);
5039                 if (tmp == 0xDEADBEEF)
5040                         break;
5041                 DRM_UDELAY(1);
5042         }
5043         if (i >= adev->usec_timeout) {
5044                 DRM_ERROR("KCQ disabled failed (scratch(0x%04X)=0x%08X)\n", scratch, tmp);
5045                 r = -EINVAL;
5046         }
5047         amdgpu_gfx_scratch_free(adev, scratch);
5048         return r;
5049 }
5050
5051 static int gfx_v8_0_hw_fini(void *handle)
5052 {
5053         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5054         int i;
5055
5056         amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
5057         amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
5058
5059         /* disable KCQ to avoid CPC touch memory not valid anymore */
5060         for (i = 0; i < adev->gfx.num_compute_rings; i++)
5061                 gfx_v8_0_kcq_disable(&adev->gfx.kiq.ring, &adev->gfx.compute_ring[i]);
5062
5063         if (amdgpu_sriov_vf(adev)) {
5064                 pr_debug("For SRIOV client, shouldn't do anything.\n");
5065                 return 0;
5066         }
5067         gfx_v8_0_cp_enable(adev, false);
5068         gfx_v8_0_rlc_stop(adev);
5069
5070         amdgpu_device_ip_set_powergating_state(adev,
5071                                                AMD_IP_BLOCK_TYPE_GFX,
5072                                                AMD_PG_STATE_UNGATE);
5073
5074         return 0;
5075 }
5076
5077 static int gfx_v8_0_suspend(void *handle)
5078 {
5079         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5080         adev->gfx.in_suspend = true;
5081         return gfx_v8_0_hw_fini(adev);
5082 }
5083
5084 static int gfx_v8_0_resume(void *handle)
5085 {
5086         int r;
5087         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5088
5089         r = gfx_v8_0_hw_init(adev);
5090         adev->gfx.in_suspend = false;
5091         return r;
5092 }
5093
5094 static bool gfx_v8_0_is_idle(void *handle)
5095 {
5096         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5097
5098         if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE))
5099                 return false;
5100         else
5101                 return true;
5102 }
5103
5104 static int gfx_v8_0_wait_for_idle(void *handle)
5105 {
5106         unsigned i;
5107         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5108
5109         for (i = 0; i < adev->usec_timeout; i++) {
5110                 if (gfx_v8_0_is_idle(handle))
5111                         return 0;
5112
5113                 udelay(1);
5114         }
5115         return -ETIMEDOUT;
5116 }
5117
5118 static bool gfx_v8_0_check_soft_reset(void *handle)
5119 {
5120         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5121         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5122         u32 tmp;
5123
5124         /* GRBM_STATUS */
5125         tmp = RREG32(mmGRBM_STATUS);
5126         if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
5127                    GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
5128                    GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
5129                    GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
5130                    GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
5131                    GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK |
5132                    GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
5133                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5134                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
5135                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5136                                                 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
5137                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5138                                                 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5139         }
5140
5141         /* GRBM_STATUS2 */
5142         tmp = RREG32(mmGRBM_STATUS2);
5143         if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
5144                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5145                                                 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
5146
5147         if (REG_GET_FIELD(tmp, GRBM_STATUS2, CPF_BUSY) ||
5148             REG_GET_FIELD(tmp, GRBM_STATUS2, CPC_BUSY) ||
5149             REG_GET_FIELD(tmp, GRBM_STATUS2, CPG_BUSY)) {
5150                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5151                                                 SOFT_RESET_CPF, 1);
5152                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5153                                                 SOFT_RESET_CPC, 1);
5154                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5155                                                 SOFT_RESET_CPG, 1);
5156                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET,
5157                                                 SOFT_RESET_GRBM, 1);
5158         }
5159
5160         /* SRBM_STATUS */
5161         tmp = RREG32(mmSRBM_STATUS);
5162         if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
5163                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5164                                                 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5165         if (REG_GET_FIELD(tmp, SRBM_STATUS, SEM_BUSY))
5166                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5167                                                 SRBM_SOFT_RESET, SOFT_RESET_SEM, 1);
5168
5169         if (grbm_soft_reset || srbm_soft_reset) {
5170                 adev->gfx.grbm_soft_reset = grbm_soft_reset;
5171                 adev->gfx.srbm_soft_reset = srbm_soft_reset;
5172                 return true;
5173         } else {
5174                 adev->gfx.grbm_soft_reset = 0;
5175                 adev->gfx.srbm_soft_reset = 0;
5176                 return false;
5177         }
5178 }
5179
5180 static int gfx_v8_0_pre_soft_reset(void *handle)
5181 {
5182         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5183         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5184
5185         if ((!adev->gfx.grbm_soft_reset) &&
5186             (!adev->gfx.srbm_soft_reset))
5187                 return 0;
5188
5189         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5190         srbm_soft_reset = adev->gfx.srbm_soft_reset;
5191
5192         /* stop the rlc */
5193         gfx_v8_0_rlc_stop(adev);
5194
5195         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5196             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5197                 /* Disable GFX parsing/prefetching */
5198                 gfx_v8_0_cp_gfx_enable(adev, false);
5199
5200         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5201             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5202             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5203             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5204                 int i;
5205
5206                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5207                         struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5208
5209                         mutex_lock(&adev->srbm_mutex);
5210                         vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5211                         gfx_v8_0_deactivate_hqd(adev, 2);
5212                         vi_srbm_select(adev, 0, 0, 0, 0);
5213                         mutex_unlock(&adev->srbm_mutex);
5214                 }
5215                 /* Disable MEC parsing/prefetching */
5216                 gfx_v8_0_cp_compute_enable(adev, false);
5217         }
5218
5219        return 0;
5220 }
5221
5222 static int gfx_v8_0_soft_reset(void *handle)
5223 {
5224         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5225         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5226         u32 tmp;
5227
5228         if ((!adev->gfx.grbm_soft_reset) &&
5229             (!adev->gfx.srbm_soft_reset))
5230                 return 0;
5231
5232         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5233         srbm_soft_reset = adev->gfx.srbm_soft_reset;
5234
5235         if (grbm_soft_reset || srbm_soft_reset) {
5236                 tmp = RREG32(mmGMCON_DEBUG);
5237                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 1);
5238                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 1);
5239                 WREG32(mmGMCON_DEBUG, tmp);
5240                 udelay(50);
5241         }
5242
5243         if (grbm_soft_reset) {
5244                 tmp = RREG32(mmGRBM_SOFT_RESET);
5245                 tmp |= grbm_soft_reset;
5246                 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5247                 WREG32(mmGRBM_SOFT_RESET, tmp);
5248                 tmp = RREG32(mmGRBM_SOFT_RESET);
5249
5250                 udelay(50);
5251
5252                 tmp &= ~grbm_soft_reset;
5253                 WREG32(mmGRBM_SOFT_RESET, tmp);
5254                 tmp = RREG32(mmGRBM_SOFT_RESET);
5255         }
5256
5257         if (srbm_soft_reset) {
5258                 tmp = RREG32(mmSRBM_SOFT_RESET);
5259                 tmp |= srbm_soft_reset;
5260                 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5261                 WREG32(mmSRBM_SOFT_RESET, tmp);
5262                 tmp = RREG32(mmSRBM_SOFT_RESET);
5263
5264                 udelay(50);
5265
5266                 tmp &= ~srbm_soft_reset;
5267                 WREG32(mmSRBM_SOFT_RESET, tmp);
5268                 tmp = RREG32(mmSRBM_SOFT_RESET);
5269         }
5270
5271         if (grbm_soft_reset || srbm_soft_reset) {
5272                 tmp = RREG32(mmGMCON_DEBUG);
5273                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 0);
5274                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 0);
5275                 WREG32(mmGMCON_DEBUG, tmp);
5276         }
5277
5278         /* Wait a little for things to settle down */
5279         udelay(50);
5280
5281         return 0;
5282 }
5283
5284 static int gfx_v8_0_post_soft_reset(void *handle)
5285 {
5286         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5287         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5288
5289         if ((!adev->gfx.grbm_soft_reset) &&
5290             (!adev->gfx.srbm_soft_reset))
5291                 return 0;
5292
5293         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5294         srbm_soft_reset = adev->gfx.srbm_soft_reset;
5295
5296         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5297             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5298                 gfx_v8_0_cp_gfx_resume(adev);
5299
5300         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5301             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5302             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5303             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5304                 int i;
5305
5306                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5307                         struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5308
5309                         mutex_lock(&adev->srbm_mutex);
5310                         vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5311                         gfx_v8_0_deactivate_hqd(adev, 2);
5312                         vi_srbm_select(adev, 0, 0, 0, 0);
5313                         mutex_unlock(&adev->srbm_mutex);
5314                 }
5315                 gfx_v8_0_kiq_resume(adev);
5316         }
5317         gfx_v8_0_rlc_start(adev);
5318
5319         return 0;
5320 }
5321
5322 /**
5323  * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
5324  *
5325  * @adev: amdgpu_device pointer
5326  *
5327  * Fetches a GPU clock counter snapshot.
5328  * Returns the 64 bit clock counter snapshot.
5329  */
5330 static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
5331 {
5332         uint64_t clock;
5333
5334         mutex_lock(&adev->gfx.gpu_clock_mutex);
5335         WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
5336         clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
5337                 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5338         mutex_unlock(&adev->gfx.gpu_clock_mutex);
5339         return clock;
5340 }
5341
5342 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
5343                                           uint32_t vmid,
5344                                           uint32_t gds_base, uint32_t gds_size,
5345                                           uint32_t gws_base, uint32_t gws_size,
5346                                           uint32_t oa_base, uint32_t oa_size)
5347 {
5348         gds_base = gds_base >> AMDGPU_GDS_SHIFT;
5349         gds_size = gds_size >> AMDGPU_GDS_SHIFT;
5350
5351         gws_base = gws_base >> AMDGPU_GWS_SHIFT;
5352         gws_size = gws_size >> AMDGPU_GWS_SHIFT;
5353
5354         oa_base = oa_base >> AMDGPU_OA_SHIFT;
5355         oa_size = oa_size >> AMDGPU_OA_SHIFT;
5356
5357         /* GDS Base */
5358         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5359         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5360                                 WRITE_DATA_DST_SEL(0)));
5361         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
5362         amdgpu_ring_write(ring, 0);
5363         amdgpu_ring_write(ring, gds_base);
5364
5365         /* GDS Size */
5366         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5367         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5368                                 WRITE_DATA_DST_SEL(0)));
5369         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
5370         amdgpu_ring_write(ring, 0);
5371         amdgpu_ring_write(ring, gds_size);
5372
5373         /* GWS */
5374         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5375         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5376                                 WRITE_DATA_DST_SEL(0)));
5377         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
5378         amdgpu_ring_write(ring, 0);
5379         amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
5380
5381         /* OA */
5382         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5383         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5384                                 WRITE_DATA_DST_SEL(0)));
5385         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
5386         amdgpu_ring_write(ring, 0);
5387         amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
5388 }
5389
5390 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
5391 {
5392         WREG32(mmSQ_IND_INDEX,
5393                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5394                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5395                 (address << SQ_IND_INDEX__INDEX__SHIFT) |
5396                 (SQ_IND_INDEX__FORCE_READ_MASK));
5397         return RREG32(mmSQ_IND_DATA);
5398 }
5399
5400 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
5401                            uint32_t wave, uint32_t thread,
5402                            uint32_t regno, uint32_t num, uint32_t *out)
5403 {
5404         WREG32(mmSQ_IND_INDEX,
5405                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5406                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5407                 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
5408                 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
5409                 (SQ_IND_INDEX__FORCE_READ_MASK) |
5410                 (SQ_IND_INDEX__AUTO_INCR_MASK));
5411         while (num--)
5412                 *(out++) = RREG32(mmSQ_IND_DATA);
5413 }
5414
5415 static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
5416 {
5417         /* type 0 wave data */
5418         dst[(*no_fields)++] = 0;
5419         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
5420         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
5421         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
5422         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
5423         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
5424         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
5425         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
5426         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
5427         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
5428         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
5429         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
5430         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
5431         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_LO);
5432         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_HI);
5433         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_LO);
5434         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI);
5435         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
5436         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
5437 }
5438
5439 static void gfx_v8_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
5440                                      uint32_t wave, uint32_t start,
5441                                      uint32_t size, uint32_t *dst)
5442 {
5443         wave_read_regs(
5444                 adev, simd, wave, 0,
5445                 start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
5446 }
5447
5448
5449 static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = {
5450         .get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter,
5451         .select_se_sh = &gfx_v8_0_select_se_sh,
5452         .read_wave_data = &gfx_v8_0_read_wave_data,
5453         .read_wave_sgprs = &gfx_v8_0_read_wave_sgprs,
5454         .select_me_pipe_q = &gfx_v8_0_select_me_pipe_q
5455 };
5456
5457 static int gfx_v8_0_early_init(void *handle)
5458 {
5459         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5460
5461         adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
5462         adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
5463         adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
5464         gfx_v8_0_set_ring_funcs(adev);
5465         gfx_v8_0_set_irq_funcs(adev);
5466         gfx_v8_0_set_gds_init(adev);
5467         gfx_v8_0_set_rlc_funcs(adev);
5468
5469         return 0;
5470 }
5471
5472 static int gfx_v8_0_late_init(void *handle)
5473 {
5474         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5475         int r;
5476
5477         r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
5478         if (r)
5479                 return r;
5480
5481         r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
5482         if (r)
5483                 return r;
5484
5485         /* requires IBs so do in late init after IB pool is initialized */
5486         r = gfx_v8_0_do_edc_gpr_workarounds(adev);
5487         if (r)
5488                 return r;
5489
5490         amdgpu_device_ip_set_powergating_state(adev,
5491                                                AMD_IP_BLOCK_TYPE_GFX,
5492                                                AMD_PG_STATE_GATE);
5493
5494         return 0;
5495 }
5496
5497 static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
5498                                                        bool enable)
5499 {
5500         if ((adev->asic_type == CHIP_POLARIS11) ||
5501             (adev->asic_type == CHIP_POLARIS12))
5502                 /* Send msg to SMU via Powerplay */
5503                 amdgpu_device_ip_set_powergating_state(adev,
5504                                                        AMD_IP_BLOCK_TYPE_SMC,
5505                                                        enable ?
5506                                                        AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE);
5507
5508         WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0);
5509 }
5510
5511 static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
5512                                                         bool enable)
5513 {
5514         WREG32_FIELD(RLC_PG_CNTL, DYN_PER_CU_PG_ENABLE, enable ? 1 : 0);
5515 }
5516
5517 static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
5518                 bool enable)
5519 {
5520         WREG32_FIELD(RLC_PG_CNTL, QUICK_PG_ENABLE, enable ? 1 : 0);
5521 }
5522
5523 static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
5524                                           bool enable)
5525 {
5526         WREG32_FIELD(RLC_PG_CNTL, GFX_POWER_GATING_ENABLE, enable ? 1 : 0);
5527 }
5528
5529 static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev,
5530                                                 bool enable)
5531 {
5532         WREG32_FIELD(RLC_PG_CNTL, GFX_PIPELINE_PG_ENABLE, enable ? 1 : 0);
5533
5534         /* Read any GFX register to wake up GFX. */
5535         if (!enable)
5536                 RREG32(mmDB_RENDER_CONTROL);
5537 }
5538
5539 static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev,
5540                                           bool enable)
5541 {
5542         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
5543                 cz_enable_gfx_cg_power_gating(adev, true);
5544                 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
5545                         cz_enable_gfx_pipeline_power_gating(adev, true);
5546         } else {
5547                 cz_enable_gfx_cg_power_gating(adev, false);
5548                 cz_enable_gfx_pipeline_power_gating(adev, false);
5549         }
5550 }
5551
5552 static int gfx_v8_0_set_powergating_state(void *handle,
5553                                           enum amd_powergating_state state)
5554 {
5555         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5556         bool enable = (state == AMD_PG_STATE_GATE);
5557
5558         if (amdgpu_sriov_vf(adev))
5559                 return 0;
5560
5561         switch (adev->asic_type) {
5562         case CHIP_CARRIZO:
5563         case CHIP_STONEY:
5564
5565                 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
5566                         cz_enable_sck_slow_down_on_power_up(adev, true);
5567                         cz_enable_sck_slow_down_on_power_down(adev, true);
5568                 } else {
5569                         cz_enable_sck_slow_down_on_power_up(adev, false);
5570                         cz_enable_sck_slow_down_on_power_down(adev, false);
5571                 }
5572                 if (adev->pg_flags & AMD_PG_SUPPORT_CP)
5573                         cz_enable_cp_power_gating(adev, true);
5574                 else
5575                         cz_enable_cp_power_gating(adev, false);
5576
5577                 cz_update_gfx_cg_power_gating(adev, enable);
5578
5579                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5580                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5581                 else
5582                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5583
5584                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5585                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5586                 else
5587                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5588                 break;
5589         case CHIP_POLARIS11:
5590         case CHIP_POLARIS12:
5591                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5592                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5593                 else
5594                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5595
5596                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5597                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5598                 else
5599                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5600
5601                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable)
5602                         polaris11_enable_gfx_quick_mg_power_gating(adev, true);
5603                 else
5604                         polaris11_enable_gfx_quick_mg_power_gating(adev, false);
5605                 break;
5606         default:
5607                 break;
5608         }
5609
5610         return 0;
5611 }
5612
5613 static void gfx_v8_0_get_clockgating_state(void *handle, u32 *flags)
5614 {
5615         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5616         int data;
5617
5618         if (amdgpu_sriov_vf(adev))
5619                 *flags = 0;
5620
5621         /* AMD_CG_SUPPORT_GFX_MGCG */
5622         data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5623         if (!(data & RLC_CGTT_MGCG_OVERRIDE__CPF_MASK))
5624                 *flags |= AMD_CG_SUPPORT_GFX_MGCG;
5625
5626         /* AMD_CG_SUPPORT_GFX_CGLG */
5627         data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5628         if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5629                 *flags |= AMD_CG_SUPPORT_GFX_CGCG;
5630
5631         /* AMD_CG_SUPPORT_GFX_CGLS */
5632         if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5633                 *flags |= AMD_CG_SUPPORT_GFX_CGLS;
5634
5635         /* AMD_CG_SUPPORT_GFX_CGTS */
5636         data = RREG32(mmCGTS_SM_CTRL_REG);
5637         if (!(data & CGTS_SM_CTRL_REG__OVERRIDE_MASK))
5638                 *flags |= AMD_CG_SUPPORT_GFX_CGTS;
5639
5640         /* AMD_CG_SUPPORT_GFX_CGTS_LS */
5641         if (!(data & CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK))
5642                 *flags |= AMD_CG_SUPPORT_GFX_CGTS_LS;
5643
5644         /* AMD_CG_SUPPORT_GFX_RLC_LS */
5645         data = RREG32(mmRLC_MEM_SLP_CNTL);
5646         if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
5647                 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
5648
5649         /* AMD_CG_SUPPORT_GFX_CP_LS */
5650         data = RREG32(mmCP_MEM_SLP_CNTL);
5651         if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
5652                 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
5653 }
5654
5655 static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
5656                                      uint32_t reg_addr, uint32_t cmd)
5657 {
5658         uint32_t data;
5659
5660         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
5661
5662         WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5663         WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5664
5665         data = RREG32(mmRLC_SERDES_WR_CTRL);
5666         if (adev->asic_type == CHIP_STONEY)
5667                 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5668                           RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5669                           RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5670                           RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5671                           RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5672                           RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5673                           RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5674                           RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5675                           RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5676         else
5677                 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5678                           RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5679                           RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5680                           RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5681                           RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5682                           RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5683                           RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5684                           RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5685                           RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
5686                           RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
5687                           RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5688         data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
5689                  (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
5690                  (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
5691                  (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
5692
5693         WREG32(mmRLC_SERDES_WR_CTRL, data);
5694 }
5695
5696 #define MSG_ENTER_RLC_SAFE_MODE     1
5697 #define MSG_EXIT_RLC_SAFE_MODE      0
5698 #define RLC_GPR_REG2__REQ_MASK 0x00000001
5699 #define RLC_GPR_REG2__REQ__SHIFT 0
5700 #define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001
5701 #define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e
5702
5703 static void iceland_enter_rlc_safe_mode(struct amdgpu_device *adev)
5704 {
5705         u32 data;
5706         unsigned i;
5707
5708         data = RREG32(mmRLC_CNTL);
5709         if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5710                 return;
5711
5712         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5713                 data |= RLC_SAFE_MODE__CMD_MASK;
5714                 data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5715                 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
5716                 WREG32(mmRLC_SAFE_MODE, data);
5717
5718                 for (i = 0; i < adev->usec_timeout; i++) {
5719                         if ((RREG32(mmRLC_GPM_STAT) &
5720                              (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5721                               RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5722                             (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5723                              RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5724                                 break;
5725                         udelay(1);
5726                 }
5727
5728                 for (i = 0; i < adev->usec_timeout; i++) {
5729                         if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5730                                 break;
5731                         udelay(1);
5732                 }
5733                 adev->gfx.rlc.in_safe_mode = true;
5734         }
5735 }
5736
5737 static void iceland_exit_rlc_safe_mode(struct amdgpu_device *adev)
5738 {
5739         u32 data = 0;
5740         unsigned i;
5741
5742         data = RREG32(mmRLC_CNTL);
5743         if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5744                 return;
5745
5746         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5747                 if (adev->gfx.rlc.in_safe_mode) {
5748                         data |= RLC_SAFE_MODE__CMD_MASK;
5749                         data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5750                         WREG32(mmRLC_SAFE_MODE, data);
5751                         adev->gfx.rlc.in_safe_mode = false;
5752                 }
5753         }
5754
5755         for (i = 0; i < adev->usec_timeout; i++) {
5756                 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5757                         break;
5758                 udelay(1);
5759         }
5760 }
5761
5762 static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
5763         .enter_safe_mode = iceland_enter_rlc_safe_mode,
5764         .exit_safe_mode = iceland_exit_rlc_safe_mode
5765 };
5766
5767 static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
5768                                                       bool enable)
5769 {
5770         uint32_t temp, data;
5771
5772         adev->gfx.rlc.funcs->enter_safe_mode(adev);
5773
5774         /* It is disabled by HW by default */
5775         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
5776                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5777                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS)
5778                                 /* 1 - RLC memory Light sleep */
5779                                 WREG32_FIELD(RLC_MEM_SLP_CNTL, RLC_MEM_LS_EN, 1);
5780
5781                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS)
5782                                 WREG32_FIELD(CP_MEM_SLP_CNTL, CP_MEM_LS_EN, 1);
5783                 }
5784
5785                 /* 3 - RLC_CGTT_MGCG_OVERRIDE */
5786                 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5787                 if (adev->flags & AMD_IS_APU)
5788                         data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5789                                   RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5790                                   RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK);
5791                 else
5792                         data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5793                                   RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5794                                   RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5795                                   RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5796
5797                 if (temp != data)
5798                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5799
5800                 /* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5801                 gfx_v8_0_wait_for_rlc_serdes(adev);
5802
5803                 /* 5 - clear mgcg override */
5804                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5805
5806                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
5807                         /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
5808                         temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5809                         data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
5810                         data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
5811                         data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
5812                         data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
5813                         if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
5814                             (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
5815                                 data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
5816                         data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
5817                         data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
5818                         if (temp != data)
5819                                 WREG32(mmCGTS_SM_CTRL_REG, data);
5820                 }
5821                 udelay(50);
5822
5823                 /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5824                 gfx_v8_0_wait_for_rlc_serdes(adev);
5825         } else {
5826                 /* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
5827                 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5828                 data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5829                                 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5830                                 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5831                                 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5832                 if (temp != data)
5833                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5834
5835                 /* 2 - disable MGLS in RLC */
5836                 data = RREG32(mmRLC_MEM_SLP_CNTL);
5837                 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
5838                         data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5839                         WREG32(mmRLC_MEM_SLP_CNTL, data);
5840                 }
5841
5842                 /* 3 - disable MGLS in CP */
5843                 data = RREG32(mmCP_MEM_SLP_CNTL);
5844                 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
5845                         data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5846                         WREG32(mmCP_MEM_SLP_CNTL, data);
5847                 }
5848
5849                 /* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
5850                 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5851                 data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
5852                                 CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
5853                 if (temp != data)
5854                         WREG32(mmCGTS_SM_CTRL_REG, data);
5855
5856                 /* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5857                 gfx_v8_0_wait_for_rlc_serdes(adev);
5858
5859                 /* 6 - set mgcg override */
5860                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5861
5862                 udelay(50);
5863
5864                 /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5865                 gfx_v8_0_wait_for_rlc_serdes(adev);
5866         }
5867
5868         adev->gfx.rlc.funcs->exit_safe_mode(adev);
5869 }
5870
5871 static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5872                                                       bool enable)
5873 {
5874         uint32_t temp, temp1, data, data1;
5875
5876         temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5877
5878         adev->gfx.rlc.funcs->enter_safe_mode(adev);
5879
5880         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
5881                 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5882                 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
5883                 if (temp1 != data1)
5884                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5885
5886                 /* : wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5887                 gfx_v8_0_wait_for_rlc_serdes(adev);
5888
5889                 /* 2 - clear cgcg override */
5890                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5891
5892                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5893                 gfx_v8_0_wait_for_rlc_serdes(adev);
5894
5895                 /* 3 - write cmd to set CGLS */
5896                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
5897
5898                 /* 4 - enable cgcg */
5899                 data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5900
5901                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5902                         /* enable cgls*/
5903                         data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5904
5905                         temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5906                         data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
5907
5908                         if (temp1 != data1)
5909                                 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5910                 } else {
5911                         data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5912                 }
5913
5914                 if (temp != data)
5915                         WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5916
5917                 /* 5 enable cntx_empty_int_enable/cntx_busy_int_enable/
5918                  * Cmp_busy/GFX_Idle interrupts
5919                  */
5920                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5921         } else {
5922                 /* disable cntx_empty_int_enable & GFX Idle interrupt */
5923                 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5924
5925                 /* TEST CGCG */
5926                 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5927                 data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
5928                                 RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
5929                 if (temp1 != data1)
5930                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5931
5932                 /* read gfx register to wake up cgcg */
5933                 RREG32(mmCB_CGTT_SCLK_CTRL);
5934                 RREG32(mmCB_CGTT_SCLK_CTRL);
5935                 RREG32(mmCB_CGTT_SCLK_CTRL);
5936                 RREG32(mmCB_CGTT_SCLK_CTRL);
5937
5938                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5939                 gfx_v8_0_wait_for_rlc_serdes(adev);
5940
5941                 /* write cmd to Set CGCG Overrride */
5942                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5943
5944                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5945                 gfx_v8_0_wait_for_rlc_serdes(adev);
5946
5947                 /* write cmd to Clear CGLS */
5948                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
5949
5950                 /* disable cgcg, cgls should be disabled too. */
5951                 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
5952                           RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
5953                 if (temp != data)
5954                         WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5955                 /* enable interrupts again for PG */
5956                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5957         }
5958
5959         gfx_v8_0_wait_for_rlc_serdes(adev);
5960
5961         adev->gfx.rlc.funcs->exit_safe_mode(adev);
5962 }
5963 static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
5964                                             bool enable)
5965 {
5966         if (enable) {
5967                 /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
5968                  * ===  MGCG + MGLS + TS(CG/LS) ===
5969                  */
5970                 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5971                 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5972         } else {
5973                 /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
5974                  * ===  CGCG + CGLS ===
5975                  */
5976                 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5977                 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5978         }
5979         return 0;
5980 }
5981
5982 static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev,
5983                                           enum amd_clockgating_state state)
5984 {
5985         uint32_t msg_id, pp_state = 0;
5986         uint32_t pp_support_state = 0;
5987
5988         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
5989                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5990                         pp_support_state = PP_STATE_SUPPORT_LS;
5991                         pp_state = PP_STATE_LS;
5992                 }
5993                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
5994                         pp_support_state |= PP_STATE_SUPPORT_CG;
5995                         pp_state |= PP_STATE_CG;
5996                 }
5997                 if (state == AMD_CG_STATE_UNGATE)
5998                         pp_state = 0;
5999
6000                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6001                                 PP_BLOCK_GFX_CG,
6002                                 pp_support_state,
6003                                 pp_state);
6004                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6005                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6006         }
6007
6008         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
6009                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
6010                         pp_support_state = PP_STATE_SUPPORT_LS;
6011                         pp_state = PP_STATE_LS;
6012                 }
6013
6014                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
6015                         pp_support_state |= PP_STATE_SUPPORT_CG;
6016                         pp_state |= PP_STATE_CG;
6017                 }
6018
6019                 if (state == AMD_CG_STATE_UNGATE)
6020                         pp_state = 0;
6021
6022                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6023                                 PP_BLOCK_GFX_MG,
6024                                 pp_support_state,
6025                                 pp_state);
6026                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6027                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6028         }
6029
6030         return 0;
6031 }
6032
6033 static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev,
6034                                           enum amd_clockgating_state state)
6035 {
6036
6037         uint32_t msg_id, pp_state = 0;
6038         uint32_t pp_support_state = 0;
6039
6040         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
6041                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
6042                         pp_support_state = PP_STATE_SUPPORT_LS;
6043                         pp_state = PP_STATE_LS;
6044                 }
6045                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
6046                         pp_support_state |= PP_STATE_SUPPORT_CG;
6047                         pp_state |= PP_STATE_CG;
6048                 }
6049                 if (state == AMD_CG_STATE_UNGATE)
6050                         pp_state = 0;
6051
6052                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6053                                 PP_BLOCK_GFX_CG,
6054                                 pp_support_state,
6055                                 pp_state);
6056                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6057                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6058         }
6059
6060         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_GFX_3D_CGLS)) {
6061                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) {
6062                         pp_support_state = PP_STATE_SUPPORT_LS;
6063                         pp_state = PP_STATE_LS;
6064                 }
6065                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) {
6066                         pp_support_state |= PP_STATE_SUPPORT_CG;
6067                         pp_state |= PP_STATE_CG;
6068                 }
6069                 if (state == AMD_CG_STATE_UNGATE)
6070                         pp_state = 0;
6071
6072                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6073                                 PP_BLOCK_GFX_3D,
6074                                 pp_support_state,
6075                                 pp_state);
6076                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6077                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6078         }
6079
6080         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
6081                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
6082                         pp_support_state = PP_STATE_SUPPORT_LS;
6083                         pp_state = PP_STATE_LS;
6084                 }
6085
6086                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
6087                         pp_support_state |= PP_STATE_SUPPORT_CG;
6088                         pp_state |= PP_STATE_CG;
6089                 }
6090
6091                 if (state == AMD_CG_STATE_UNGATE)
6092                         pp_state = 0;
6093
6094                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6095                                 PP_BLOCK_GFX_MG,
6096                                 pp_support_state,
6097                                 pp_state);
6098                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6099                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6100         }
6101
6102         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
6103                 pp_support_state = PP_STATE_SUPPORT_LS;
6104
6105                 if (state == AMD_CG_STATE_UNGATE)
6106                         pp_state = 0;
6107                 else
6108                         pp_state = PP_STATE_LS;
6109
6110                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6111                                 PP_BLOCK_GFX_RLC,
6112                                 pp_support_state,
6113                                 pp_state);
6114                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6115                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6116         }
6117
6118         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
6119                 pp_support_state = PP_STATE_SUPPORT_LS;
6120
6121                 if (state == AMD_CG_STATE_UNGATE)
6122                         pp_state = 0;
6123                 else
6124                         pp_state = PP_STATE_LS;
6125                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6126                         PP_BLOCK_GFX_CP,
6127                         pp_support_state,
6128                         pp_state);
6129                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6130                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6131         }
6132
6133         return 0;
6134 }
6135
6136 static int gfx_v8_0_set_clockgating_state(void *handle,
6137                                           enum amd_clockgating_state state)
6138 {
6139         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
6140
6141         if (amdgpu_sriov_vf(adev))
6142                 return 0;
6143
6144         switch (adev->asic_type) {
6145         case CHIP_FIJI:
6146         case CHIP_CARRIZO:
6147         case CHIP_STONEY:
6148                 gfx_v8_0_update_gfx_clock_gating(adev,
6149                                                  state == AMD_CG_STATE_GATE);
6150                 break;
6151         case CHIP_TONGA:
6152                 gfx_v8_0_tonga_update_gfx_clock_gating(adev, state);
6153                 break;
6154         case CHIP_POLARIS10:
6155         case CHIP_POLARIS11:
6156         case CHIP_POLARIS12:
6157                 gfx_v8_0_polaris_update_gfx_clock_gating(adev, state);
6158                 break;
6159         default:
6160                 break;
6161         }
6162         return 0;
6163 }
6164
6165 static u64 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring)
6166 {
6167         return ring->adev->wb.wb[ring->rptr_offs];
6168 }
6169
6170 static u64 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
6171 {
6172         struct amdgpu_device *adev = ring->adev;
6173
6174         if (ring->use_doorbell)
6175                 /* XXX check if swapping is necessary on BE */
6176                 return ring->adev->wb.wb[ring->wptr_offs];
6177         else
6178                 return RREG32(mmCP_RB0_WPTR);
6179 }
6180
6181 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
6182 {
6183         struct amdgpu_device *adev = ring->adev;
6184
6185         if (ring->use_doorbell) {
6186                 /* XXX check if swapping is necessary on BE */
6187                 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6188                 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6189         } else {
6190                 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
6191                 (void)RREG32(mmCP_RB0_WPTR);
6192         }
6193 }
6194
6195 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
6196 {
6197         u32 ref_and_mask, reg_mem_engine;
6198
6199         if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) ||
6200             (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)) {
6201                 switch (ring->me) {
6202                 case 1:
6203                         ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
6204                         break;
6205                 case 2:
6206                         ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
6207                         break;
6208                 default:
6209                         return;
6210                 }
6211                 reg_mem_engine = 0;
6212         } else {
6213                 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
6214                 reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
6215         }
6216
6217         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6218         amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
6219                                  WAIT_REG_MEM_FUNCTION(3) |  /* == */
6220                                  reg_mem_engine));
6221         amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
6222         amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
6223         amdgpu_ring_write(ring, ref_and_mask);
6224         amdgpu_ring_write(ring, ref_and_mask);
6225         amdgpu_ring_write(ring, 0x20); /* poll interval */
6226 }
6227
6228 static void gfx_v8_0_ring_emit_vgt_flush(struct amdgpu_ring *ring)
6229 {
6230         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6231         amdgpu_ring_write(ring, EVENT_TYPE(VS_PARTIAL_FLUSH) |
6232                 EVENT_INDEX(4));
6233
6234         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6235         amdgpu_ring_write(ring, EVENT_TYPE(VGT_FLUSH) |
6236                 EVENT_INDEX(0));
6237 }
6238
6239 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
6240                                       struct amdgpu_ib *ib,
6241                                       unsigned vmid, bool ctx_switch)
6242 {
6243         u32 header, control = 0;
6244
6245         if (ib->flags & AMDGPU_IB_FLAG_CE)
6246                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
6247         else
6248                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
6249
6250         control |= ib->length_dw | (vmid << 24);
6251
6252         if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
6253                 control |= INDIRECT_BUFFER_PRE_ENB(1);
6254
6255                 if (!(ib->flags & AMDGPU_IB_FLAG_CE))
6256                         gfx_v8_0_ring_emit_de_meta(ring);
6257         }
6258
6259         amdgpu_ring_write(ring, header);
6260         amdgpu_ring_write(ring,
6261 #ifdef __BIG_ENDIAN
6262                           (2 << 0) |
6263 #endif
6264                           (ib->gpu_addr & 0xFFFFFFFC));
6265         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6266         amdgpu_ring_write(ring, control);
6267 }
6268
6269 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
6270                                           struct amdgpu_ib *ib,
6271                                           unsigned vmid, bool ctx_switch)
6272 {
6273         u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
6274
6275         amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
6276         amdgpu_ring_write(ring,
6277 #ifdef __BIG_ENDIAN
6278                                 (2 << 0) |
6279 #endif
6280                                 (ib->gpu_addr & 0xFFFFFFFC));
6281         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6282         amdgpu_ring_write(ring, control);
6283 }
6284
6285 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
6286                                          u64 seq, unsigned flags)
6287 {
6288         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6289         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6290
6291         /* EVENT_WRITE_EOP - flush caches, send int */
6292         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6293         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6294                                  EOP_TC_ACTION_EN |
6295                                  EOP_TC_WB_ACTION_EN |
6296                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6297                                  EVENT_INDEX(5)));
6298         amdgpu_ring_write(ring, addr & 0xfffffffc);
6299         amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
6300                           DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6301         amdgpu_ring_write(ring, lower_32_bits(seq));
6302         amdgpu_ring_write(ring, upper_32_bits(seq));
6303
6304 }
6305
6306 static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
6307 {
6308         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6309         uint32_t seq = ring->fence_drv.sync_seq;
6310         uint64_t addr = ring->fence_drv.gpu_addr;
6311
6312         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6313         amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
6314                                  WAIT_REG_MEM_FUNCTION(3) | /* equal */
6315                                  WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
6316         amdgpu_ring_write(ring, addr & 0xfffffffc);
6317         amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
6318         amdgpu_ring_write(ring, seq);
6319         amdgpu_ring_write(ring, 0xffffffff);
6320         amdgpu_ring_write(ring, 4); /* poll interval */
6321 }
6322
6323 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
6324                                         unsigned vmid, uint64_t pd_addr)
6325 {
6326         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6327
6328         amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
6329
6330         /* wait for the invalidate to complete */
6331         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6332         amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6333                                  WAIT_REG_MEM_FUNCTION(0) |  /* always */
6334                                  WAIT_REG_MEM_ENGINE(0))); /* me */
6335         amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6336         amdgpu_ring_write(ring, 0);
6337         amdgpu_ring_write(ring, 0); /* ref */
6338         amdgpu_ring_write(ring, 0); /* mask */
6339         amdgpu_ring_write(ring, 0x20); /* poll interval */
6340
6341         /* compute doesn't have PFP */
6342         if (usepfp) {
6343                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
6344                 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6345                 amdgpu_ring_write(ring, 0x0);
6346         }
6347 }
6348
6349 static u64 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
6350 {
6351         return ring->adev->wb.wb[ring->wptr_offs];
6352 }
6353
6354 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
6355 {
6356         struct amdgpu_device *adev = ring->adev;
6357
6358         /* XXX check if swapping is necessary on BE */
6359         adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6360         WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6361 }
6362
6363 static void gfx_v8_0_ring_set_pipe_percent(struct amdgpu_ring *ring,
6364                                            bool acquire)
6365 {
6366         struct amdgpu_device *adev = ring->adev;
6367         int pipe_num, tmp, reg;
6368         int pipe_percent = acquire ? SPI_WCL_PIPE_PERCENT_GFX__VALUE_MASK : 0x1;
6369
6370         pipe_num = ring->me * adev->gfx.mec.num_pipe_per_mec + ring->pipe;
6371
6372         /* first me only has 2 entries, GFX and HP3D */
6373         if (ring->me > 0)
6374                 pipe_num -= 2;
6375
6376         reg = mmSPI_WCL_PIPE_PERCENT_GFX + pipe_num;
6377         tmp = RREG32(reg);
6378         tmp = REG_SET_FIELD(tmp, SPI_WCL_PIPE_PERCENT_GFX, VALUE, pipe_percent);
6379         WREG32(reg, tmp);
6380 }
6381
6382 static void gfx_v8_0_pipe_reserve_resources(struct amdgpu_device *adev,
6383                                             struct amdgpu_ring *ring,
6384                                             bool acquire)
6385 {
6386         int i, pipe;
6387         bool reserve;
6388         struct amdgpu_ring *iring;
6389
6390         mutex_lock(&adev->gfx.pipe_reserve_mutex);
6391         pipe = amdgpu_gfx_queue_to_bit(adev, ring->me, ring->pipe, 0);
6392         if (acquire)
6393                 set_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6394         else
6395                 clear_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6396
6397         if (!bitmap_weight(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)) {
6398                 /* Clear all reservations - everyone reacquires all resources */
6399                 for (i = 0; i < adev->gfx.num_gfx_rings; ++i)
6400                         gfx_v8_0_ring_set_pipe_percent(&adev->gfx.gfx_ring[i],
6401                                                        true);
6402
6403                 for (i = 0; i < adev->gfx.num_compute_rings; ++i)
6404                         gfx_v8_0_ring_set_pipe_percent(&adev->gfx.compute_ring[i],
6405                                                        true);
6406         } else {
6407                 /* Lower all pipes without a current reservation */
6408                 for (i = 0; i < adev->gfx.num_gfx_rings; ++i) {
6409                         iring = &adev->gfx.gfx_ring[i];
6410                         pipe = amdgpu_gfx_queue_to_bit(adev,
6411                                                        iring->me,
6412                                                        iring->pipe,
6413                                                        0);
6414                         reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6415                         gfx_v8_0_ring_set_pipe_percent(iring, reserve);
6416                 }
6417
6418                 for (i = 0; i < adev->gfx.num_compute_rings; ++i) {
6419                         iring = &adev->gfx.compute_ring[i];
6420                         pipe = amdgpu_gfx_queue_to_bit(adev,
6421                                                        iring->me,
6422                                                        iring->pipe,
6423                                                        0);
6424                         reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6425                         gfx_v8_0_ring_set_pipe_percent(iring, reserve);
6426                 }
6427         }
6428
6429         mutex_unlock(&adev->gfx.pipe_reserve_mutex);
6430 }
6431
6432 static void gfx_v8_0_hqd_set_priority(struct amdgpu_device *adev,
6433                                       struct amdgpu_ring *ring,
6434                                       bool acquire)
6435 {
6436         uint32_t pipe_priority = acquire ? 0x2 : 0x0;
6437         uint32_t queue_priority = acquire ? 0xf : 0x0;
6438
6439         mutex_lock(&adev->srbm_mutex);
6440         vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
6441
6442         WREG32(mmCP_HQD_PIPE_PRIORITY, pipe_priority);
6443         WREG32(mmCP_HQD_QUEUE_PRIORITY, queue_priority);
6444
6445         vi_srbm_select(adev, 0, 0, 0, 0);
6446         mutex_unlock(&adev->srbm_mutex);
6447 }
6448 static void gfx_v8_0_ring_set_priority_compute(struct amdgpu_ring *ring,
6449                                                enum drm_sched_priority priority)
6450 {
6451         struct amdgpu_device *adev = ring->adev;
6452         bool acquire = priority == DRM_SCHED_PRIORITY_HIGH_HW;
6453
6454         if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE)
6455                 return;
6456
6457         gfx_v8_0_hqd_set_priority(adev, ring, acquire);
6458         gfx_v8_0_pipe_reserve_resources(adev, ring, acquire);
6459 }
6460
6461 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
6462                                              u64 addr, u64 seq,
6463                                              unsigned flags)
6464 {
6465         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6466         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6467
6468         /* RELEASE_MEM - flush caches, send int */
6469         amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
6470         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6471                                  EOP_TC_ACTION_EN |
6472                                  EOP_TC_WB_ACTION_EN |
6473                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6474                                  EVENT_INDEX(5)));
6475         amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6476         amdgpu_ring_write(ring, addr & 0xfffffffc);
6477         amdgpu_ring_write(ring, upper_32_bits(addr));
6478         amdgpu_ring_write(ring, lower_32_bits(seq));
6479         amdgpu_ring_write(ring, upper_32_bits(seq));
6480 }
6481
6482 static void gfx_v8_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
6483                                          u64 seq, unsigned int flags)
6484 {
6485         /* we only allocate 32bit for each seq wb address */
6486         BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
6487
6488         /* write fence seq to the "addr" */
6489         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6490         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6491                                  WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
6492         amdgpu_ring_write(ring, lower_32_bits(addr));
6493         amdgpu_ring_write(ring, upper_32_bits(addr));
6494         amdgpu_ring_write(ring, lower_32_bits(seq));
6495
6496         if (flags & AMDGPU_FENCE_FLAG_INT) {
6497                 /* set register to trigger INT */
6498                 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6499                 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6500                                          WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
6501                 amdgpu_ring_write(ring, mmCPC_INT_STATUS);
6502                 amdgpu_ring_write(ring, 0);
6503                 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
6504         }
6505 }
6506
6507 static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring)
6508 {
6509         amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6510         amdgpu_ring_write(ring, 0);
6511 }
6512
6513 static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
6514 {
6515         uint32_t dw2 = 0;
6516
6517         if (amdgpu_sriov_vf(ring->adev))
6518                 gfx_v8_0_ring_emit_ce_meta(ring);
6519
6520         dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
6521         if (flags & AMDGPU_HAVE_CTX_SWITCH) {
6522                 gfx_v8_0_ring_emit_vgt_flush(ring);
6523                 /* set load_global_config & load_global_uconfig */
6524                 dw2 |= 0x8001;
6525                 /* set load_cs_sh_regs */
6526                 dw2 |= 0x01000000;
6527                 /* set load_per_context_state & load_gfx_sh_regs for GFX */
6528                 dw2 |= 0x10002;
6529
6530                 /* set load_ce_ram if preamble presented */
6531                 if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
6532                         dw2 |= 0x10000000;
6533         } else {
6534                 /* still load_ce_ram if this is the first time preamble presented
6535                  * although there is no context switch happens.
6536                  */
6537                 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
6538                         dw2 |= 0x10000000;
6539         }
6540
6541         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6542         amdgpu_ring_write(ring, dw2);
6543         amdgpu_ring_write(ring, 0);
6544 }
6545
6546 static unsigned gfx_v8_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
6547 {
6548         unsigned ret;
6549
6550         amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
6551         amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
6552         amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
6553         amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
6554         ret = ring->wptr & ring->buf_mask;
6555         amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
6556         return ret;
6557 }
6558
6559 static void gfx_v8_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
6560 {
6561         unsigned cur;
6562
6563         BUG_ON(offset > ring->buf_mask);
6564         BUG_ON(ring->ring[offset] != 0x55aa55aa);
6565
6566         cur = (ring->wptr & ring->buf_mask) - 1;
6567         if (likely(cur > offset))
6568                 ring->ring[offset] = cur - offset;
6569         else
6570                 ring->ring[offset] = (ring->ring_size >> 2) - offset + cur;
6571 }
6572
6573 static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
6574 {
6575         struct amdgpu_device *adev = ring->adev;
6576
6577         amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
6578         amdgpu_ring_write(ring, 0 |     /* src: register*/
6579                                 (5 << 8) |      /* dst: memory */
6580                                 (1 << 20));     /* write confirm */
6581         amdgpu_ring_write(ring, reg);
6582         amdgpu_ring_write(ring, 0);
6583         amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
6584                                 adev->virt.reg_val_offs * 4));
6585         amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
6586                                 adev->virt.reg_val_offs * 4));
6587 }
6588
6589 static void gfx_v8_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
6590                                   uint32_t val)
6591 {
6592         uint32_t cmd;
6593
6594         switch (ring->funcs->type) {
6595         case AMDGPU_RING_TYPE_GFX:
6596                 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
6597                 break;
6598         case AMDGPU_RING_TYPE_KIQ:
6599                 cmd = 1 << 16; /* no inc addr */
6600                 break;
6601         default:
6602                 cmd = WR_CONFIRM;
6603                 break;
6604         }
6605
6606         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6607         amdgpu_ring_write(ring, cmd);
6608         amdgpu_ring_write(ring, reg);
6609         amdgpu_ring_write(ring, 0);
6610         amdgpu_ring_write(ring, val);
6611 }
6612
6613 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
6614                                                  enum amdgpu_interrupt_state state)
6615 {
6616         WREG32_FIELD(CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE,
6617                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6618 }
6619
6620 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
6621                                                      int me, int pipe,
6622                                                      enum amdgpu_interrupt_state state)
6623 {
6624         u32 mec_int_cntl, mec_int_cntl_reg;
6625
6626         /*
6627          * amdgpu controls only the first MEC. That's why this function only
6628          * handles the setting of interrupts for this specific MEC. All other
6629          * pipes' interrupts are set by amdkfd.
6630          */
6631
6632         if (me == 1) {
6633                 switch (pipe) {
6634                 case 0:
6635                         mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
6636                         break;
6637                 case 1:
6638                         mec_int_cntl_reg = mmCP_ME1_PIPE1_INT_CNTL;
6639                         break;
6640                 case 2:
6641                         mec_int_cntl_reg = mmCP_ME1_PIPE2_INT_CNTL;
6642                         break;
6643                 case 3:
6644                         mec_int_cntl_reg = mmCP_ME1_PIPE3_INT_CNTL;
6645                         break;
6646                 default:
6647                         DRM_DEBUG("invalid pipe %d\n", pipe);
6648                         return;
6649                 }
6650         } else {
6651                 DRM_DEBUG("invalid me %d\n", me);
6652                 return;
6653         }
6654
6655         switch (state) {
6656         case AMDGPU_IRQ_STATE_DISABLE:
6657                 mec_int_cntl = RREG32(mec_int_cntl_reg);
6658                 mec_int_cntl &= ~CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6659                 WREG32(mec_int_cntl_reg, mec_int_cntl);
6660                 break;
6661         case AMDGPU_IRQ_STATE_ENABLE:
6662                 mec_int_cntl = RREG32(mec_int_cntl_reg);
6663                 mec_int_cntl |= CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6664                 WREG32(mec_int_cntl_reg, mec_int_cntl);
6665                 break;
6666         default:
6667                 break;
6668         }
6669 }
6670
6671 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
6672                                              struct amdgpu_irq_src *source,
6673                                              unsigned type,
6674                                              enum amdgpu_interrupt_state state)
6675 {
6676         WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE,
6677                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6678
6679         return 0;
6680 }
6681
6682 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
6683                                               struct amdgpu_irq_src *source,
6684                                               unsigned type,
6685                                               enum amdgpu_interrupt_state state)
6686 {
6687         WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE,
6688                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6689
6690         return 0;
6691 }
6692
6693 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6694                                             struct amdgpu_irq_src *src,
6695                                             unsigned type,
6696                                             enum amdgpu_interrupt_state state)
6697 {
6698         switch (type) {
6699         case AMDGPU_CP_IRQ_GFX_EOP:
6700                 gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
6701                 break;
6702         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6703                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6704                 break;
6705         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6706                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6707                 break;
6708         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6709                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6710                 break;
6711         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6712                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6713                 break;
6714         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6715                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6716                 break;
6717         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6718                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6719                 break;
6720         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6721                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6722                 break;
6723         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6724                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6725                 break;
6726         default:
6727                 break;
6728         }
6729         return 0;
6730 }
6731
6732 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
6733                             struct amdgpu_irq_src *source,
6734                             struct amdgpu_iv_entry *entry)
6735 {
6736         int i;
6737         u8 me_id, pipe_id, queue_id;
6738         struct amdgpu_ring *ring;
6739
6740         DRM_DEBUG("IH: CP EOP\n");
6741         me_id = (entry->ring_id & 0x0c) >> 2;
6742         pipe_id = (entry->ring_id & 0x03) >> 0;
6743         queue_id = (entry->ring_id & 0x70) >> 4;
6744
6745         switch (me_id) {
6746         case 0:
6747                 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6748                 break;
6749         case 1:
6750         case 2:
6751                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6752                         ring = &adev->gfx.compute_ring[i];
6753                         /* Per-queue interrupt is supported for MEC starting from VI.
6754                           * The interrupt can only be enabled/disabled per pipe instead of per queue.
6755                           */
6756                         if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6757                                 amdgpu_fence_process(ring);
6758                 }
6759                 break;
6760         }
6761         return 0;
6762 }
6763
6764 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
6765                                  struct amdgpu_irq_src *source,
6766                                  struct amdgpu_iv_entry *entry)
6767 {
6768         DRM_ERROR("Illegal register access in command stream\n");
6769         schedule_work(&adev->reset_work);
6770         return 0;
6771 }
6772
6773 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
6774                                   struct amdgpu_irq_src *source,
6775                                   struct amdgpu_iv_entry *entry)
6776 {
6777         DRM_ERROR("Illegal instruction in command stream\n");
6778         schedule_work(&adev->reset_work);
6779         return 0;
6780 }
6781
6782 static int gfx_v8_0_kiq_set_interrupt_state(struct amdgpu_device *adev,
6783                                             struct amdgpu_irq_src *src,
6784                                             unsigned int type,
6785                                             enum amdgpu_interrupt_state state)
6786 {
6787         struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
6788
6789         switch (type) {
6790         case AMDGPU_CP_KIQ_IRQ_DRIVER0:
6791                 WREG32_FIELD(CPC_INT_CNTL, GENERIC2_INT_ENABLE,
6792                              state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6793                 if (ring->me == 1)
6794                         WREG32_FIELD_OFFSET(CP_ME1_PIPE0_INT_CNTL,
6795                                      ring->pipe,
6796                                      GENERIC2_INT_ENABLE,
6797                                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6798                 else
6799                         WREG32_FIELD_OFFSET(CP_ME2_PIPE0_INT_CNTL,
6800                                      ring->pipe,
6801                                      GENERIC2_INT_ENABLE,
6802                                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6803                 break;
6804         default:
6805                 BUG(); /* kiq only support GENERIC2_INT now */
6806                 break;
6807         }
6808         return 0;
6809 }
6810
6811 static int gfx_v8_0_kiq_irq(struct amdgpu_device *adev,
6812                             struct amdgpu_irq_src *source,
6813                             struct amdgpu_iv_entry *entry)
6814 {
6815         u8 me_id, pipe_id, queue_id;
6816         struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
6817
6818         me_id = (entry->ring_id & 0x0c) >> 2;
6819         pipe_id = (entry->ring_id & 0x03) >> 0;
6820         queue_id = (entry->ring_id & 0x70) >> 4;
6821         DRM_DEBUG("IH: CPC GENERIC2_INT, me:%d, pipe:%d, queue:%d\n",
6822                    me_id, pipe_id, queue_id);
6823
6824         amdgpu_fence_process(ring);
6825         return 0;
6826 }
6827
6828 static const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
6829         .name = "gfx_v8_0",
6830         .early_init = gfx_v8_0_early_init,
6831         .late_init = gfx_v8_0_late_init,
6832         .sw_init = gfx_v8_0_sw_init,
6833         .sw_fini = gfx_v8_0_sw_fini,
6834         .hw_init = gfx_v8_0_hw_init,
6835         .hw_fini = gfx_v8_0_hw_fini,
6836         .suspend = gfx_v8_0_suspend,
6837         .resume = gfx_v8_0_resume,
6838         .is_idle = gfx_v8_0_is_idle,
6839         .wait_for_idle = gfx_v8_0_wait_for_idle,
6840         .check_soft_reset = gfx_v8_0_check_soft_reset,
6841         .pre_soft_reset = gfx_v8_0_pre_soft_reset,
6842         .soft_reset = gfx_v8_0_soft_reset,
6843         .post_soft_reset = gfx_v8_0_post_soft_reset,
6844         .set_clockgating_state = gfx_v8_0_set_clockgating_state,
6845         .set_powergating_state = gfx_v8_0_set_powergating_state,
6846         .get_clockgating_state = gfx_v8_0_get_clockgating_state,
6847 };
6848
6849 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
6850         .type = AMDGPU_RING_TYPE_GFX,
6851         .align_mask = 0xff,
6852         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6853         .support_64bit_ptrs = false,
6854         .get_rptr = gfx_v8_0_ring_get_rptr,
6855         .get_wptr = gfx_v8_0_ring_get_wptr_gfx,
6856         .set_wptr = gfx_v8_0_ring_set_wptr_gfx,
6857         .emit_frame_size = /* maximum 215dw if count 16 IBs in */
6858                 5 +  /* COND_EXEC */
6859                 7 +  /* PIPELINE_SYNC */
6860                 VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 9 + /* VM_FLUSH */
6861                 8 +  /* FENCE for VM_FLUSH */
6862                 20 + /* GDS switch */
6863                 4 + /* double SWITCH_BUFFER,
6864                        the first COND_EXEC jump to the place just
6865                            prior to this double SWITCH_BUFFER  */
6866                 5 + /* COND_EXEC */
6867                 7 +      /*     HDP_flush */
6868                 4 +      /*     VGT_flush */
6869                 14 + /* CE_META */
6870                 31 + /* DE_META */
6871                 3 + /* CNTX_CTRL */
6872                 5 + /* HDP_INVL */
6873                 8 + 8 + /* FENCE x2 */
6874                 2, /* SWITCH_BUFFER */
6875         .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_gfx */
6876         .emit_ib = gfx_v8_0_ring_emit_ib_gfx,
6877         .emit_fence = gfx_v8_0_ring_emit_fence_gfx,
6878         .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6879         .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6880         .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6881         .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6882         .test_ring = gfx_v8_0_ring_test_ring,
6883         .test_ib = gfx_v8_0_ring_test_ib,
6884         .insert_nop = amdgpu_ring_insert_nop,
6885         .pad_ib = amdgpu_ring_generic_pad_ib,
6886         .emit_switch_buffer = gfx_v8_ring_emit_sb,
6887         .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
6888         .init_cond_exec = gfx_v8_0_ring_emit_init_cond_exec,
6889         .patch_cond_exec = gfx_v8_0_ring_emit_patch_cond_exec,
6890         .emit_wreg = gfx_v8_0_ring_emit_wreg,
6891 };
6892
6893 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
6894         .type = AMDGPU_RING_TYPE_COMPUTE,
6895         .align_mask = 0xff,
6896         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6897         .support_64bit_ptrs = false,
6898         .get_rptr = gfx_v8_0_ring_get_rptr,
6899         .get_wptr = gfx_v8_0_ring_get_wptr_compute,
6900         .set_wptr = gfx_v8_0_ring_set_wptr_compute,
6901         .emit_frame_size =
6902                 20 + /* gfx_v8_0_ring_emit_gds_switch */
6903                 7 + /* gfx_v8_0_ring_emit_hdp_flush */
6904                 5 + /* hdp_invalidate */
6905                 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6906                 VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v8_0_ring_emit_vm_flush */
6907                 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
6908         .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */
6909         .emit_ib = gfx_v8_0_ring_emit_ib_compute,
6910         .emit_fence = gfx_v8_0_ring_emit_fence_compute,
6911         .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6912         .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6913         .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6914         .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6915         .test_ring = gfx_v8_0_ring_test_ring,
6916         .test_ib = gfx_v8_0_ring_test_ib,
6917         .insert_nop = amdgpu_ring_insert_nop,
6918         .pad_ib = amdgpu_ring_generic_pad_ib,
6919         .set_priority = gfx_v8_0_ring_set_priority_compute,
6920         .emit_wreg = gfx_v8_0_ring_emit_wreg,
6921 };
6922
6923 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {
6924         .type = AMDGPU_RING_TYPE_KIQ,
6925         .align_mask = 0xff,
6926         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6927         .support_64bit_ptrs = false,
6928         .get_rptr = gfx_v8_0_ring_get_rptr,
6929         .get_wptr = gfx_v8_0_ring_get_wptr_compute,
6930         .set_wptr = gfx_v8_0_ring_set_wptr_compute,
6931         .emit_frame_size =
6932                 20 + /* gfx_v8_0_ring_emit_gds_switch */
6933                 7 + /* gfx_v8_0_ring_emit_hdp_flush */
6934                 5 + /* hdp_invalidate */
6935                 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6936                 17 + /* gfx_v8_0_ring_emit_vm_flush */
6937                 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */
6938         .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */
6939         .emit_ib = gfx_v8_0_ring_emit_ib_compute,
6940         .emit_fence = gfx_v8_0_ring_emit_fence_kiq,
6941         .test_ring = gfx_v8_0_ring_test_ring,
6942         .test_ib = gfx_v8_0_ring_test_ib,
6943         .insert_nop = amdgpu_ring_insert_nop,
6944         .pad_ib = amdgpu_ring_generic_pad_ib,
6945         .emit_rreg = gfx_v8_0_ring_emit_rreg,
6946         .emit_wreg = gfx_v8_0_ring_emit_wreg,
6947 };
6948
6949 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
6950 {
6951         int i;
6952
6953         adev->gfx.kiq.ring.funcs = &gfx_v8_0_ring_funcs_kiq;
6954
6955         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
6956                 adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
6957
6958         for (i = 0; i < adev->gfx.num_compute_rings; i++)
6959                 adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
6960 }
6961
6962 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
6963         .set = gfx_v8_0_set_eop_interrupt_state,
6964         .process = gfx_v8_0_eop_irq,
6965 };
6966
6967 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
6968         .set = gfx_v8_0_set_priv_reg_fault_state,
6969         .process = gfx_v8_0_priv_reg_irq,
6970 };
6971
6972 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
6973         .set = gfx_v8_0_set_priv_inst_fault_state,
6974         .process = gfx_v8_0_priv_inst_irq,
6975 };
6976
6977 static const struct amdgpu_irq_src_funcs gfx_v8_0_kiq_irq_funcs = {
6978         .set = gfx_v8_0_kiq_set_interrupt_state,
6979         .process = gfx_v8_0_kiq_irq,
6980 };
6981
6982 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
6983 {
6984         adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
6985         adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
6986
6987         adev->gfx.priv_reg_irq.num_types = 1;
6988         adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
6989
6990         adev->gfx.priv_inst_irq.num_types = 1;
6991         adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
6992
6993         adev->gfx.kiq.irq.num_types = AMDGPU_CP_KIQ_IRQ_LAST;
6994         adev->gfx.kiq.irq.funcs = &gfx_v8_0_kiq_irq_funcs;
6995 }
6996
6997 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
6998 {
6999         adev->gfx.rlc.funcs = &iceland_rlc_funcs;
7000 }
7001
7002 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
7003 {
7004         /* init asci gds info */
7005         adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
7006         adev->gds.gws.total_size = 64;
7007         adev->gds.oa.total_size = 16;
7008
7009         if (adev->gds.mem.total_size == 64 * 1024) {
7010                 adev->gds.mem.gfx_partition_size = 4096;
7011                 adev->gds.mem.cs_partition_size = 4096;
7012
7013                 adev->gds.gws.gfx_partition_size = 4;
7014                 adev->gds.gws.cs_partition_size = 4;
7015
7016                 adev->gds.oa.gfx_partition_size = 4;
7017                 adev->gds.oa.cs_partition_size = 1;
7018         } else {
7019                 adev->gds.mem.gfx_partition_size = 1024;
7020                 adev->gds.mem.cs_partition_size = 1024;
7021
7022                 adev->gds.gws.gfx_partition_size = 16;
7023                 adev->gds.gws.cs_partition_size = 16;
7024
7025                 adev->gds.oa.gfx_partition_size = 4;
7026                 adev->gds.oa.cs_partition_size = 4;
7027         }
7028 }
7029
7030 static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
7031                                                  u32 bitmap)
7032 {
7033         u32 data;
7034
7035         if (!bitmap)
7036                 return;
7037
7038         data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7039         data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7040
7041         WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
7042 }
7043
7044 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
7045 {
7046         u32 data, mask;
7047
7048         data =  RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) |
7049                 RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
7050
7051         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
7052
7053         return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask;
7054 }
7055
7056 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
7057 {
7058         int i, j, k, counter, active_cu_number = 0;
7059         u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
7060         struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
7061         unsigned disable_masks[4 * 2];
7062         u32 ao_cu_num;
7063
7064         memset(cu_info, 0, sizeof(*cu_info));
7065
7066         if (adev->flags & AMD_IS_APU)
7067                 ao_cu_num = 2;
7068         else
7069                 ao_cu_num = adev->gfx.config.max_cu_per_sh;
7070
7071         amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
7072
7073         mutex_lock(&adev->grbm_idx_mutex);
7074         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
7075                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
7076                         mask = 1;
7077                         ao_bitmap = 0;
7078                         counter = 0;
7079                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
7080                         if (i < 4 && j < 2)
7081                                 gfx_v8_0_set_user_cu_inactive_bitmap(
7082                                         adev, disable_masks[i * 2 + j]);
7083                         bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
7084                         cu_info->bitmap[i][j] = bitmap;
7085
7086                         for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
7087                                 if (bitmap & mask) {
7088                                         if (counter < ao_cu_num)
7089                                                 ao_bitmap |= mask;
7090                                         counter ++;
7091                                 }
7092                                 mask <<= 1;
7093                         }
7094                         active_cu_number += counter;
7095                         if (i < 2 && j < 2)
7096                                 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
7097                         cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
7098                 }
7099         }
7100         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
7101         mutex_unlock(&adev->grbm_idx_mutex);
7102
7103         cu_info->number = active_cu_number;
7104         cu_info->ao_cu_mask = ao_cu_mask;
7105         cu_info->simd_per_cu = NUM_SIMD_PER_CU;
7106         cu_info->max_waves_per_simd = 10;
7107         cu_info->max_scratch_slots_per_cu = 32;
7108         cu_info->wave_front_size = 64;
7109         cu_info->lds_size = 64;
7110 }
7111
7112 const struct amdgpu_ip_block_version gfx_v8_0_ip_block =
7113 {
7114         .type = AMD_IP_BLOCK_TYPE_GFX,
7115         .major = 8,
7116         .minor = 0,
7117         .rev = 0,
7118         .funcs = &gfx_v8_0_ip_funcs,
7119 };
7120
7121 const struct amdgpu_ip_block_version gfx_v8_1_ip_block =
7122 {
7123         .type = AMD_IP_BLOCK_TYPE_GFX,
7124         .major = 8,
7125         .minor = 1,
7126         .rev = 0,
7127         .funcs = &gfx_v8_0_ip_funcs,
7128 };
7129
7130 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
7131 {
7132         uint64_t ce_payload_addr;
7133         int cnt_ce;
7134         union {
7135                 struct vi_ce_ib_state regular;
7136                 struct vi_ce_ib_state_chained_ib chained;
7137         } ce_payload = {};
7138
7139         if (ring->adev->virt.chained_ib_support) {
7140                 ce_payload_addr = amdgpu_csa_vaddr(ring->adev) +
7141                         offsetof(struct vi_gfx_meta_data_chained_ib, ce_payload);
7142                 cnt_ce = (sizeof(ce_payload.chained) >> 2) + 4 - 2;
7143         } else {
7144                 ce_payload_addr = amdgpu_csa_vaddr(ring->adev) +
7145                         offsetof(struct vi_gfx_meta_data, ce_payload);
7146                 cnt_ce = (sizeof(ce_payload.regular) >> 2) + 4 - 2;
7147         }
7148
7149         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_ce));
7150         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
7151                                 WRITE_DATA_DST_SEL(8) |
7152                                 WR_CONFIRM) |
7153                                 WRITE_DATA_CACHE_POLICY(0));
7154         amdgpu_ring_write(ring, lower_32_bits(ce_payload_addr));
7155         amdgpu_ring_write(ring, upper_32_bits(ce_payload_addr));
7156         amdgpu_ring_write_multiple(ring, (void *)&ce_payload, cnt_ce - 2);
7157 }
7158
7159 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring)
7160 {
7161         uint64_t de_payload_addr, gds_addr, csa_addr;
7162         int cnt_de;
7163         union {
7164                 struct vi_de_ib_state regular;
7165                 struct vi_de_ib_state_chained_ib chained;
7166         } de_payload = {};
7167
7168         csa_addr = amdgpu_csa_vaddr(ring->adev);
7169         gds_addr = csa_addr + 4096;
7170         if (ring->adev->virt.chained_ib_support) {
7171                 de_payload.chained.gds_backup_addrlo = lower_32_bits(gds_addr);
7172                 de_payload.chained.gds_backup_addrhi = upper_32_bits(gds_addr);
7173                 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data_chained_ib, de_payload);
7174                 cnt_de = (sizeof(de_payload.chained) >> 2) + 4 - 2;
7175         } else {
7176                 de_payload.regular.gds_backup_addrlo = lower_32_bits(gds_addr);
7177                 de_payload.regular.gds_backup_addrhi = upper_32_bits(gds_addr);
7178                 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data, de_payload);
7179                 cnt_de = (sizeof(de_payload.regular) >> 2) + 4 - 2;
7180         }
7181
7182         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_de));
7183         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
7184                                 WRITE_DATA_DST_SEL(8) |
7185                                 WR_CONFIRM) |
7186                                 WRITE_DATA_CACHE_POLICY(0));
7187         amdgpu_ring_write(ring, lower_32_bits(de_payload_addr));
7188         amdgpu_ring_write(ring, upper_32_bits(de_payload_addr));
7189         amdgpu_ring_write_multiple(ring, (void *)&de_payload, cnt_de - 2);
7190 }
This page took 0.478564 seconds and 4 git commands to generate.