]> Git Repo - linux.git/blob - drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
drm/amdgpu: added AMD GPU instance counting V2
[linux.git] / drivers / gpu / drm / amd / amdgpu / gfx_v8_0.c
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 #include <linux/kernel.h>
24 #include <linux/firmware.h>
25 #include <drm/drmP.h>
26 #include "amdgpu.h"
27 #include "amdgpu_gfx.h"
28 #include "vi.h"
29 #include "vi_structs.h"
30 #include "vid.h"
31 #include "amdgpu_ucode.h"
32 #include "amdgpu_atombios.h"
33 #include "atombios_i2c.h"
34 #include "clearstate_vi.h"
35
36 #include "gmc/gmc_8_2_d.h"
37 #include "gmc/gmc_8_2_sh_mask.h"
38
39 #include "oss/oss_3_0_d.h"
40 #include "oss/oss_3_0_sh_mask.h"
41
42 #include "bif/bif_5_0_d.h"
43 #include "bif/bif_5_0_sh_mask.h"
44 #include "gca/gfx_8_0_d.h"
45 #include "gca/gfx_8_0_enum.h"
46 #include "gca/gfx_8_0_sh_mask.h"
47 #include "gca/gfx_8_0_enum.h"
48
49 #include "dce/dce_10_0_d.h"
50 #include "dce/dce_10_0_sh_mask.h"
51
52 #include "smu/smu_7_1_3_d.h"
53
54 #include "ivsrcid/ivsrcid_vislands30.h"
55
56 #define GFX8_NUM_GFX_RINGS     1
57 #define GFX8_MEC_HPD_SIZE 2048
58
59 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
60 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
61 #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
62 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
63
64 #define ARRAY_MODE(x)                                   ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
65 #define PIPE_CONFIG(x)                                  ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
66 #define TILE_SPLIT(x)                                   ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
67 #define MICRO_TILE_MODE_NEW(x)                          ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
68 #define SAMPLE_SPLIT(x)                                 ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
69 #define BANK_WIDTH(x)                                   ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
70 #define BANK_HEIGHT(x)                                  ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
71 #define MACRO_TILE_ASPECT(x)                            ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
72 #define NUM_BANKS(x)                                    ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
73
74 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK            0x00000001L
75 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK            0x00000002L
76 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK           0x00000004L
77 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK           0x00000008L
78 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK           0x00000010L
79 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK           0x00000020L
80
81 /* BPM SERDES CMD */
82 #define SET_BPM_SERDES_CMD    1
83 #define CLE_BPM_SERDES_CMD    0
84
85 /* BPM Register Address*/
86 enum {
87         BPM_REG_CGLS_EN = 0,        /* Enable/Disable CGLS */
88         BPM_REG_CGLS_ON,            /* ON/OFF CGLS: shall be controlled by RLC FW */
89         BPM_REG_CGCG_OVERRIDE,      /* Set/Clear CGCG Override */
90         BPM_REG_MGCG_OVERRIDE,      /* Set/Clear MGCG Override */
91         BPM_REG_FGCG_OVERRIDE,      /* Set/Clear FGCG Override */
92         BPM_REG_FGCG_MAX
93 };
94
95 #define RLC_FormatDirectRegListLength        14
96
97 MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
98 MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
99 MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
100 MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
101 MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
102 MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
103
104 MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
105 MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
106 MODULE_FIRMWARE("amdgpu/stoney_me.bin");
107 MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
108 MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
109
110 MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
111 MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
112 MODULE_FIRMWARE("amdgpu/tonga_me.bin");
113 MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
114 MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
115 MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
116
117 MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
118 MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
119 MODULE_FIRMWARE("amdgpu/topaz_me.bin");
120 MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
121 MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
122
123 MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
124 MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
125 MODULE_FIRMWARE("amdgpu/fiji_me.bin");
126 MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
127 MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
128 MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
129
130 MODULE_FIRMWARE("amdgpu/polaris10_ce.bin");
131 MODULE_FIRMWARE("amdgpu/polaris10_ce_2.bin");
132 MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin");
133 MODULE_FIRMWARE("amdgpu/polaris10_pfp_2.bin");
134 MODULE_FIRMWARE("amdgpu/polaris10_me.bin");
135 MODULE_FIRMWARE("amdgpu/polaris10_me_2.bin");
136 MODULE_FIRMWARE("amdgpu/polaris10_mec.bin");
137 MODULE_FIRMWARE("amdgpu/polaris10_mec_2.bin");
138 MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin");
139 MODULE_FIRMWARE("amdgpu/polaris10_mec2_2.bin");
140 MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin");
141
142 MODULE_FIRMWARE("amdgpu/polaris11_ce.bin");
143 MODULE_FIRMWARE("amdgpu/polaris11_ce_2.bin");
144 MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin");
145 MODULE_FIRMWARE("amdgpu/polaris11_pfp_2.bin");
146 MODULE_FIRMWARE("amdgpu/polaris11_me.bin");
147 MODULE_FIRMWARE("amdgpu/polaris11_me_2.bin");
148 MODULE_FIRMWARE("amdgpu/polaris11_mec.bin");
149 MODULE_FIRMWARE("amdgpu/polaris11_mec_2.bin");
150 MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin");
151 MODULE_FIRMWARE("amdgpu/polaris11_mec2_2.bin");
152 MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin");
153
154 MODULE_FIRMWARE("amdgpu/polaris12_ce.bin");
155 MODULE_FIRMWARE("amdgpu/polaris12_ce_2.bin");
156 MODULE_FIRMWARE("amdgpu/polaris12_pfp.bin");
157 MODULE_FIRMWARE("amdgpu/polaris12_pfp_2.bin");
158 MODULE_FIRMWARE("amdgpu/polaris12_me.bin");
159 MODULE_FIRMWARE("amdgpu/polaris12_me_2.bin");
160 MODULE_FIRMWARE("amdgpu/polaris12_mec.bin");
161 MODULE_FIRMWARE("amdgpu/polaris12_mec_2.bin");
162 MODULE_FIRMWARE("amdgpu/polaris12_mec2.bin");
163 MODULE_FIRMWARE("amdgpu/polaris12_mec2_2.bin");
164 MODULE_FIRMWARE("amdgpu/polaris12_rlc.bin");
165
166 MODULE_FIRMWARE("amdgpu/vegam_ce.bin");
167 MODULE_FIRMWARE("amdgpu/vegam_pfp.bin");
168 MODULE_FIRMWARE("amdgpu/vegam_me.bin");
169 MODULE_FIRMWARE("amdgpu/vegam_mec.bin");
170 MODULE_FIRMWARE("amdgpu/vegam_mec2.bin");
171 MODULE_FIRMWARE("amdgpu/vegam_rlc.bin");
172
173 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
174 {
175         {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
176         {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
177         {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
178         {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
179         {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
180         {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
181         {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
182         {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
183         {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
184         {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
185         {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
186         {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
187         {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
188         {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
189         {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
190         {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
191 };
192
193 static const u32 golden_settings_tonga_a11[] =
194 {
195         mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
196         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
197         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
198         mmGB_GPU_ID, 0x0000000f, 0x00000000,
199         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
200         mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
201         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
202         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
203         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
204         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
205         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
206         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
207         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
208         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
209         mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
210         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
211 };
212
213 static const u32 tonga_golden_common_all[] =
214 {
215         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
216         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
217         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
218         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
219         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
220         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
221         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
222         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
223 };
224
225 static const u32 tonga_mgcg_cgcg_init[] =
226 {
227         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
228         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
229         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
230         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
231         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
232         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
233         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
234         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
235         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
236         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
237         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
238         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
239         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
240         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
241         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
242         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
243         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
244         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
245         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
246         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
247         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
248         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
249         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
250         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
251         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
252         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
253         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
254         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
255         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
256         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
257         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
258         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
259         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
260         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
261         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
262         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
263         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
264         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
265         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
266         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
267         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
268         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
269         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
270         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
271         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
272         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
273         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
274         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
275         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
276         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
277         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
278         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
279         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
280         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
281         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
282         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
283         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
284         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
285         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
286         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
287         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
288         mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
289         mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
290         mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
291         mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
292         mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
293         mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
294         mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
295         mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
296         mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
297         mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
298         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
299         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
300         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
301         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
302 };
303
304 static const u32 golden_settings_vegam_a11[] =
305 {
306         mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
307         mmCB_HW_CONTROL_2, 0x0f000000, 0x0d000000,
308         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
309         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
310         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
311         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
312         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x3a00161a,
313         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002e,
314         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
315         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
316         mmSQ_CONFIG, 0x07f80000, 0x01180000,
317         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
318         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
319         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
320         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
321         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x32761054,
322         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
323 };
324
325 static const u32 vegam_golden_common_all[] =
326 {
327         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
328         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
329         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
330         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
331         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
332         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
333 };
334
335 static const u32 golden_settings_polaris11_a11[] =
336 {
337         mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208,
338         mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
339         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
340         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
341         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
342         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
343         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
344         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
345         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
346         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
347         mmSQ_CONFIG, 0x07f80000, 0x01180000,
348         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
349         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
350         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
351         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
352         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
353         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
354 };
355
356 static const u32 polaris11_golden_common_all[] =
357 {
358         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
359         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002,
360         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
361         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
362         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
363         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
364 };
365
366 static const u32 golden_settings_polaris10_a11[] =
367 {
368         mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
369         mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
370         mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
371         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
372         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
373         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
374         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
375         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
376         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a,
377         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
378         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
379         mmSQ_CONFIG, 0x07f80000, 0x07180000,
380         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
381         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
382         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
383         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
384         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
385 };
386
387 static const u32 polaris10_golden_common_all[] =
388 {
389         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
390         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
391         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
392         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
393         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
394         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
395         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
396         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
397 };
398
399 static const u32 fiji_golden_common_all[] =
400 {
401         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
402         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
403         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
404         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
405         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
406         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
407         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
408         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
409         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
410         mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
411 };
412
413 static const u32 golden_settings_fiji_a10[] =
414 {
415         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
416         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
417         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
418         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
419         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
420         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
421         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
422         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
423         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
424         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
425         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
426 };
427
428 static const u32 fiji_mgcg_cgcg_init[] =
429 {
430         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
431         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
432         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
433         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
434         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
435         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
436         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
437         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
438         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
439         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
440         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
441         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
442         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
443         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
444         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
445         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
446         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
447         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
448         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
449         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
450         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
451         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
452         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
453         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
454         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
455         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
456         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
457         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
458         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
459         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
460         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
461         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
462         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
463         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
464         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
465 };
466
467 static const u32 golden_settings_iceland_a11[] =
468 {
469         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
470         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
471         mmDB_DEBUG3, 0xc0000000, 0xc0000000,
472         mmGB_GPU_ID, 0x0000000f, 0x00000000,
473         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
474         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
475         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
476         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
477         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
478         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
479         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
480         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
481         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
482         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
483         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
484         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
485 };
486
487 static const u32 iceland_golden_common_all[] =
488 {
489         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
490         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
491         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
492         mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
493         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
494         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
495         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
496         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
497 };
498
499 static const u32 iceland_mgcg_cgcg_init[] =
500 {
501         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
502         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
503         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
504         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
505         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
506         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
507         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
508         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
509         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
510         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
511         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
512         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
513         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
514         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
515         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
516         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
517         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
518         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
519         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
520         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
521         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
522         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
523         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
524         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
525         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
526         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
527         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
528         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
529         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
530         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
531         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
532         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
533         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
534         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
535         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
536         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
537         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
538         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
539         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
540         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
541         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
542         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
543         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
544         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
545         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
546         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
547         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
548         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
549         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
550         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
551         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
552         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
553         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
554         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
555         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
556         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
557         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
558         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
559         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
560         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
561         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
562         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
563         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
564         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
565 };
566
567 static const u32 cz_golden_settings_a11[] =
568 {
569         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
570         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
571         mmGB_GPU_ID, 0x0000000f, 0x00000000,
572         mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
573         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
574         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
575         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
576         mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
577         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
578         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
579         mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
580         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
581 };
582
583 static const u32 cz_golden_common_all[] =
584 {
585         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
586         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
587         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
588         mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
589         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
590         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
591         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
592         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
593 };
594
595 static const u32 cz_mgcg_cgcg_init[] =
596 {
597         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
598         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
599         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
600         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
601         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
602         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
603         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
604         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
605         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
606         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
607         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
608         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
609         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
610         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
611         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
612         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
613         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
614         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
615         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
616         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
617         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
618         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
619         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
620         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
621         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
622         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
623         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
624         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
625         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
626         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
627         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
628         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
629         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
630         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
631         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
632         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
633         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
634         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
635         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
636         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
637         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
638         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
639         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
640         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
641         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
642         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
643         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
644         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
645         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
646         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
647         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
648         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
649         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
650         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
651         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
652         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
653         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
654         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
655         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
656         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
657         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
658         mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
659         mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
660         mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
661         mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
662         mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
663         mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
664         mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
665         mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
666         mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
667         mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
668         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
669         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
670         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
671         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
672 };
673
674 static const u32 stoney_golden_settings_a11[] =
675 {
676         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
677         mmGB_GPU_ID, 0x0000000f, 0x00000000,
678         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
679         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
680         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
681         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
682         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
683         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
684         mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
685         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
686 };
687
688 static const u32 stoney_golden_common_all[] =
689 {
690         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
691         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
692         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
693         mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
694         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
695         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
696         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
697         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
698 };
699
700 static const u32 stoney_mgcg_cgcg_init[] =
701 {
702         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
703         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
704         mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
705         mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
706         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
707 };
708
709
710 static const char * const sq_edc_source_names[] = {
711         "SQ_EDC_INFO_SOURCE_INVALID: No EDC error has occurred",
712         "SQ_EDC_INFO_SOURCE_INST: EDC source is Instruction Fetch",
713         "SQ_EDC_INFO_SOURCE_SGPR: EDC source is SGPR or SQC data return",
714         "SQ_EDC_INFO_SOURCE_VGPR: EDC source is VGPR",
715         "SQ_EDC_INFO_SOURCE_LDS: EDC source is LDS",
716         "SQ_EDC_INFO_SOURCE_GDS: EDC source is GDS",
717         "SQ_EDC_INFO_SOURCE_TA: EDC source is TA",
718 };
719
720 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
721 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
722 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
723 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
724 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
725 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
726 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring);
727 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring);
728
729 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
730 {
731         switch (adev->asic_type) {
732         case CHIP_TOPAZ:
733                 amdgpu_device_program_register_sequence(adev,
734                                                         iceland_mgcg_cgcg_init,
735                                                         ARRAY_SIZE(iceland_mgcg_cgcg_init));
736                 amdgpu_device_program_register_sequence(adev,
737                                                         golden_settings_iceland_a11,
738                                                         ARRAY_SIZE(golden_settings_iceland_a11));
739                 amdgpu_device_program_register_sequence(adev,
740                                                         iceland_golden_common_all,
741                                                         ARRAY_SIZE(iceland_golden_common_all));
742                 break;
743         case CHIP_FIJI:
744                 amdgpu_device_program_register_sequence(adev,
745                                                         fiji_mgcg_cgcg_init,
746                                                         ARRAY_SIZE(fiji_mgcg_cgcg_init));
747                 amdgpu_device_program_register_sequence(adev,
748                                                         golden_settings_fiji_a10,
749                                                         ARRAY_SIZE(golden_settings_fiji_a10));
750                 amdgpu_device_program_register_sequence(adev,
751                                                         fiji_golden_common_all,
752                                                         ARRAY_SIZE(fiji_golden_common_all));
753                 break;
754
755         case CHIP_TONGA:
756                 amdgpu_device_program_register_sequence(adev,
757                                                         tonga_mgcg_cgcg_init,
758                                                         ARRAY_SIZE(tonga_mgcg_cgcg_init));
759                 amdgpu_device_program_register_sequence(adev,
760                                                         golden_settings_tonga_a11,
761                                                         ARRAY_SIZE(golden_settings_tonga_a11));
762                 amdgpu_device_program_register_sequence(adev,
763                                                         tonga_golden_common_all,
764                                                         ARRAY_SIZE(tonga_golden_common_all));
765                 break;
766         case CHIP_VEGAM:
767                 amdgpu_device_program_register_sequence(adev,
768                                                         golden_settings_vegam_a11,
769                                                         ARRAY_SIZE(golden_settings_vegam_a11));
770                 amdgpu_device_program_register_sequence(adev,
771                                                         vegam_golden_common_all,
772                                                         ARRAY_SIZE(vegam_golden_common_all));
773                 break;
774         case CHIP_POLARIS11:
775         case CHIP_POLARIS12:
776                 amdgpu_device_program_register_sequence(adev,
777                                                         golden_settings_polaris11_a11,
778                                                         ARRAY_SIZE(golden_settings_polaris11_a11));
779                 amdgpu_device_program_register_sequence(adev,
780                                                         polaris11_golden_common_all,
781                                                         ARRAY_SIZE(polaris11_golden_common_all));
782                 break;
783         case CHIP_POLARIS10:
784                 amdgpu_device_program_register_sequence(adev,
785                                                         golden_settings_polaris10_a11,
786                                                         ARRAY_SIZE(golden_settings_polaris10_a11));
787                 amdgpu_device_program_register_sequence(adev,
788                                                         polaris10_golden_common_all,
789                                                         ARRAY_SIZE(polaris10_golden_common_all));
790                 WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C);
791                 if (adev->pdev->revision == 0xc7 &&
792                     ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) ||
793                      (adev->pdev->subsystem_device == 0x4a8 && adev->pdev->subsystem_vendor == 0x1043) ||
794                      (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1682))) {
795                         amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD);
796                         amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0);
797                 }
798                 break;
799         case CHIP_CARRIZO:
800                 amdgpu_device_program_register_sequence(adev,
801                                                         cz_mgcg_cgcg_init,
802                                                         ARRAY_SIZE(cz_mgcg_cgcg_init));
803                 amdgpu_device_program_register_sequence(adev,
804                                                         cz_golden_settings_a11,
805                                                         ARRAY_SIZE(cz_golden_settings_a11));
806                 amdgpu_device_program_register_sequence(adev,
807                                                         cz_golden_common_all,
808                                                         ARRAY_SIZE(cz_golden_common_all));
809                 break;
810         case CHIP_STONEY:
811                 amdgpu_device_program_register_sequence(adev,
812                                                         stoney_mgcg_cgcg_init,
813                                                         ARRAY_SIZE(stoney_mgcg_cgcg_init));
814                 amdgpu_device_program_register_sequence(adev,
815                                                         stoney_golden_settings_a11,
816                                                         ARRAY_SIZE(stoney_golden_settings_a11));
817                 amdgpu_device_program_register_sequence(adev,
818                                                         stoney_golden_common_all,
819                                                         ARRAY_SIZE(stoney_golden_common_all));
820                 break;
821         default:
822                 break;
823         }
824 }
825
826 static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
827 {
828         adev->gfx.scratch.num_reg = 8;
829         adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
830         adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
831 }
832
833 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
834 {
835         struct amdgpu_device *adev = ring->adev;
836         uint32_t scratch;
837         uint32_t tmp = 0;
838         unsigned i;
839         int r;
840
841         r = amdgpu_gfx_scratch_get(adev, &scratch);
842         if (r) {
843                 DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
844                 return r;
845         }
846         WREG32(scratch, 0xCAFEDEAD);
847         r = amdgpu_ring_alloc(ring, 3);
848         if (r) {
849                 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
850                           ring->idx, r);
851                 amdgpu_gfx_scratch_free(adev, scratch);
852                 return r;
853         }
854         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
855         amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
856         amdgpu_ring_write(ring, 0xDEADBEEF);
857         amdgpu_ring_commit(ring);
858
859         for (i = 0; i < adev->usec_timeout; i++) {
860                 tmp = RREG32(scratch);
861                 if (tmp == 0xDEADBEEF)
862                         break;
863                 DRM_UDELAY(1);
864         }
865         if (i < adev->usec_timeout) {
866                 DRM_DEBUG("ring test on %d succeeded in %d usecs\n",
867                          ring->idx, i);
868         } else {
869                 DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
870                           ring->idx, scratch, tmp);
871                 r = -EINVAL;
872         }
873         amdgpu_gfx_scratch_free(adev, scratch);
874         return r;
875 }
876
877 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
878 {
879         struct amdgpu_device *adev = ring->adev;
880         struct amdgpu_ib ib;
881         struct dma_fence *f = NULL;
882
883         unsigned int index;
884         uint64_t gpu_addr;
885         uint32_t tmp;
886         long r;
887
888         r = amdgpu_device_wb_get(adev, &index);
889         if (r) {
890                 dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r);
891                 return r;
892         }
893
894         gpu_addr = adev->wb.gpu_addr + (index * 4);
895         adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
896         memset(&ib, 0, sizeof(ib));
897         r = amdgpu_ib_get(adev, NULL, 16, &ib);
898         if (r) {
899                 DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
900                 goto err1;
901         }
902         ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
903         ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
904         ib.ptr[2] = lower_32_bits(gpu_addr);
905         ib.ptr[3] = upper_32_bits(gpu_addr);
906         ib.ptr[4] = 0xDEADBEEF;
907         ib.length_dw = 5;
908
909         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
910         if (r)
911                 goto err2;
912
913         r = dma_fence_wait_timeout(f, false, timeout);
914         if (r == 0) {
915                 DRM_ERROR("amdgpu: IB test timed out.\n");
916                 r = -ETIMEDOUT;
917                 goto err2;
918         } else if (r < 0) {
919                 DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
920                 goto err2;
921         }
922
923         tmp = adev->wb.wb[index];
924         if (tmp == 0xDEADBEEF) {
925                 DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx);
926                 r = 0;
927         } else {
928                 DRM_ERROR("ib test on ring %d failed\n", ring->idx);
929                 r = -EINVAL;
930         }
931
932 err2:
933         amdgpu_ib_free(adev, &ib, NULL);
934         dma_fence_put(f);
935 err1:
936         amdgpu_device_wb_free(adev, index);
937         return r;
938 }
939
940
941 static void gfx_v8_0_free_microcode(struct amdgpu_device *adev)
942 {
943         release_firmware(adev->gfx.pfp_fw);
944         adev->gfx.pfp_fw = NULL;
945         release_firmware(adev->gfx.me_fw);
946         adev->gfx.me_fw = NULL;
947         release_firmware(adev->gfx.ce_fw);
948         adev->gfx.ce_fw = NULL;
949         release_firmware(adev->gfx.rlc_fw);
950         adev->gfx.rlc_fw = NULL;
951         release_firmware(adev->gfx.mec_fw);
952         adev->gfx.mec_fw = NULL;
953         if ((adev->asic_type != CHIP_STONEY) &&
954             (adev->asic_type != CHIP_TOPAZ))
955                 release_firmware(adev->gfx.mec2_fw);
956         adev->gfx.mec2_fw = NULL;
957
958         kfree(adev->gfx.rlc.register_list_format);
959 }
960
961 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
962 {
963         const char *chip_name;
964         char fw_name[30];
965         int err;
966         struct amdgpu_firmware_info *info = NULL;
967         const struct common_firmware_header *header = NULL;
968         const struct gfx_firmware_header_v1_0 *cp_hdr;
969         const struct rlc_firmware_header_v2_0 *rlc_hdr;
970         unsigned int *tmp = NULL, i;
971
972         DRM_DEBUG("\n");
973
974         switch (adev->asic_type) {
975         case CHIP_TOPAZ:
976                 chip_name = "topaz";
977                 break;
978         case CHIP_TONGA:
979                 chip_name = "tonga";
980                 break;
981         case CHIP_CARRIZO:
982                 chip_name = "carrizo";
983                 break;
984         case CHIP_FIJI:
985                 chip_name = "fiji";
986                 break;
987         case CHIP_STONEY:
988                 chip_name = "stoney";
989                 break;
990         case CHIP_POLARIS10:
991                 chip_name = "polaris10";
992                 break;
993         case CHIP_POLARIS11:
994                 chip_name = "polaris11";
995                 break;
996         case CHIP_POLARIS12:
997                 chip_name = "polaris12";
998                 break;
999         case CHIP_VEGAM:
1000                 chip_name = "vegam";
1001                 break;
1002         default:
1003                 BUG();
1004         }
1005
1006         if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1007                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp_2.bin", chip_name);
1008                 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
1009                 if (err == -ENOENT) {
1010                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
1011                         err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
1012                 }
1013         } else {
1014                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
1015                 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
1016         }
1017         if (err)
1018                 goto out;
1019         err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
1020         if (err)
1021                 goto out;
1022         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1023         adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1024         adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1025
1026         if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1027                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me_2.bin", chip_name);
1028                 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1029                 if (err == -ENOENT) {
1030                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1031                         err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1032                 }
1033         } else {
1034                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1035                 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1036         }
1037         if (err)
1038                 goto out;
1039         err = amdgpu_ucode_validate(adev->gfx.me_fw);
1040         if (err)
1041                 goto out;
1042         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1043         adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1044
1045         adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1046
1047         if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1048                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce_2.bin", chip_name);
1049                 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1050                 if (err == -ENOENT) {
1051                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1052                         err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1053                 }
1054         } else {
1055                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1056                 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1057         }
1058         if (err)
1059                 goto out;
1060         err = amdgpu_ucode_validate(adev->gfx.ce_fw);
1061         if (err)
1062                 goto out;
1063         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1064         adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1065         adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1066
1067         /*
1068          * Support for MCBP/Virtualization in combination with chained IBs is
1069          * formal released on feature version #46
1070          */
1071         if (adev->gfx.ce_feature_version >= 46 &&
1072             adev->gfx.pfp_feature_version >= 46) {
1073                 adev->virt.chained_ib_support = true;
1074                 DRM_INFO("Chained IB support enabled!\n");
1075         } else
1076                 adev->virt.chained_ib_support = false;
1077
1078         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
1079         err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
1080         if (err)
1081                 goto out;
1082         err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
1083         rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1084         adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
1085         adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
1086
1087         adev->gfx.rlc.save_and_restore_offset =
1088                         le32_to_cpu(rlc_hdr->save_and_restore_offset);
1089         adev->gfx.rlc.clear_state_descriptor_offset =
1090                         le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
1091         adev->gfx.rlc.avail_scratch_ram_locations =
1092                         le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
1093         adev->gfx.rlc.reg_restore_list_size =
1094                         le32_to_cpu(rlc_hdr->reg_restore_list_size);
1095         adev->gfx.rlc.reg_list_format_start =
1096                         le32_to_cpu(rlc_hdr->reg_list_format_start);
1097         adev->gfx.rlc.reg_list_format_separate_start =
1098                         le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
1099         adev->gfx.rlc.starting_offsets_start =
1100                         le32_to_cpu(rlc_hdr->starting_offsets_start);
1101         adev->gfx.rlc.reg_list_format_size_bytes =
1102                         le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
1103         adev->gfx.rlc.reg_list_size_bytes =
1104                         le32_to_cpu(rlc_hdr->reg_list_size_bytes);
1105
1106         adev->gfx.rlc.register_list_format =
1107                         kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
1108                                         adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
1109
1110         if (!adev->gfx.rlc.register_list_format) {
1111                 err = -ENOMEM;
1112                 goto out;
1113         }
1114
1115         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1116                         le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1117         for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++)
1118                 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
1119
1120         adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1121
1122         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1123                         le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1124         for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++)
1125                 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
1126
1127         if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1128                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec_2.bin", chip_name);
1129                 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1130                 if (err == -ENOENT) {
1131                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1132                         err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1133                 }
1134         } else {
1135                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1136                 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1137         }
1138         if (err)
1139                 goto out;
1140         err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1141         if (err)
1142                 goto out;
1143         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1144         adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1145         adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1146
1147         if ((adev->asic_type != CHIP_STONEY) &&
1148             (adev->asic_type != CHIP_TOPAZ)) {
1149                 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1150                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2_2.bin", chip_name);
1151                         err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1152                         if (err == -ENOENT) {
1153                                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1154                                 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1155                         }
1156                 } else {
1157                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1158                         err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1159                 }
1160                 if (!err) {
1161                         err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1162                         if (err)
1163                                 goto out;
1164                         cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1165                                 adev->gfx.mec2_fw->data;
1166                         adev->gfx.mec2_fw_version =
1167                                 le32_to_cpu(cp_hdr->header.ucode_version);
1168                         adev->gfx.mec2_feature_version =
1169                                 le32_to_cpu(cp_hdr->ucode_feature_version);
1170                 } else {
1171                         err = 0;
1172                         adev->gfx.mec2_fw = NULL;
1173                 }
1174         }
1175
1176         if (adev->firmware.load_type == AMDGPU_FW_LOAD_SMU) {
1177                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1178                 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1179                 info->fw = adev->gfx.pfp_fw;
1180                 header = (const struct common_firmware_header *)info->fw->data;
1181                 adev->firmware.fw_size +=
1182                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1183
1184                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1185                 info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1186                 info->fw = adev->gfx.me_fw;
1187                 header = (const struct common_firmware_header *)info->fw->data;
1188                 adev->firmware.fw_size +=
1189                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1190
1191                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1192                 info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1193                 info->fw = adev->gfx.ce_fw;
1194                 header = (const struct common_firmware_header *)info->fw->data;
1195                 adev->firmware.fw_size +=
1196                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1197
1198                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1199                 info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1200                 info->fw = adev->gfx.rlc_fw;
1201                 header = (const struct common_firmware_header *)info->fw->data;
1202                 adev->firmware.fw_size +=
1203                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1204
1205                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1206                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1207                 info->fw = adev->gfx.mec_fw;
1208                 header = (const struct common_firmware_header *)info->fw->data;
1209                 adev->firmware.fw_size +=
1210                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1211
1212                 /* we need account JT in */
1213                 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1214                 adev->firmware.fw_size +=
1215                         ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE);
1216
1217                 if (amdgpu_sriov_vf(adev)) {
1218                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_STORAGE];
1219                         info->ucode_id = AMDGPU_UCODE_ID_STORAGE;
1220                         info->fw = adev->gfx.mec_fw;
1221                         adev->firmware.fw_size +=
1222                                 ALIGN(le32_to_cpu(64 * PAGE_SIZE), PAGE_SIZE);
1223                 }
1224
1225                 if (adev->gfx.mec2_fw) {
1226                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1227                         info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1228                         info->fw = adev->gfx.mec2_fw;
1229                         header = (const struct common_firmware_header *)info->fw->data;
1230                         adev->firmware.fw_size +=
1231                                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1232                 }
1233
1234         }
1235
1236 out:
1237         if (err) {
1238                 dev_err(adev->dev,
1239                         "gfx8: Failed to load firmware \"%s\"\n",
1240                         fw_name);
1241                 release_firmware(adev->gfx.pfp_fw);
1242                 adev->gfx.pfp_fw = NULL;
1243                 release_firmware(adev->gfx.me_fw);
1244                 adev->gfx.me_fw = NULL;
1245                 release_firmware(adev->gfx.ce_fw);
1246                 adev->gfx.ce_fw = NULL;
1247                 release_firmware(adev->gfx.rlc_fw);
1248                 adev->gfx.rlc_fw = NULL;
1249                 release_firmware(adev->gfx.mec_fw);
1250                 adev->gfx.mec_fw = NULL;
1251                 release_firmware(adev->gfx.mec2_fw);
1252                 adev->gfx.mec2_fw = NULL;
1253         }
1254         return err;
1255 }
1256
1257 static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
1258                                     volatile u32 *buffer)
1259 {
1260         u32 count = 0, i;
1261         const struct cs_section_def *sect = NULL;
1262         const struct cs_extent_def *ext = NULL;
1263
1264         if (adev->gfx.rlc.cs_data == NULL)
1265                 return;
1266         if (buffer == NULL)
1267                 return;
1268
1269         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1270         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1271
1272         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1273         buffer[count++] = cpu_to_le32(0x80000000);
1274         buffer[count++] = cpu_to_le32(0x80000000);
1275
1276         for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1277                 for (ext = sect->section; ext->extent != NULL; ++ext) {
1278                         if (sect->id == SECT_CONTEXT) {
1279                                 buffer[count++] =
1280                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1281                                 buffer[count++] = cpu_to_le32(ext->reg_index -
1282                                                 PACKET3_SET_CONTEXT_REG_START);
1283                                 for (i = 0; i < ext->reg_count; i++)
1284                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
1285                         } else {
1286                                 return;
1287                         }
1288                 }
1289         }
1290
1291         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1292         buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
1293                         PACKET3_SET_CONTEXT_REG_START);
1294         buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config);
1295         buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1);
1296
1297         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1298         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1299
1300         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1301         buffer[count++] = cpu_to_le32(0);
1302 }
1303
1304 static void cz_init_cp_jump_table(struct amdgpu_device *adev)
1305 {
1306         const __le32 *fw_data;
1307         volatile u32 *dst_ptr;
1308         int me, i, max_me = 4;
1309         u32 bo_offset = 0;
1310         u32 table_offset, table_size;
1311
1312         if (adev->asic_type == CHIP_CARRIZO)
1313                 max_me = 5;
1314
1315         /* write the cp table buffer */
1316         dst_ptr = adev->gfx.rlc.cp_table_ptr;
1317         for (me = 0; me < max_me; me++) {
1318                 if (me == 0) {
1319                         const struct gfx_firmware_header_v1_0 *hdr =
1320                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1321                         fw_data = (const __le32 *)
1322                                 (adev->gfx.ce_fw->data +
1323                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1324                         table_offset = le32_to_cpu(hdr->jt_offset);
1325                         table_size = le32_to_cpu(hdr->jt_size);
1326                 } else if (me == 1) {
1327                         const struct gfx_firmware_header_v1_0 *hdr =
1328                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1329                         fw_data = (const __le32 *)
1330                                 (adev->gfx.pfp_fw->data +
1331                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1332                         table_offset = le32_to_cpu(hdr->jt_offset);
1333                         table_size = le32_to_cpu(hdr->jt_size);
1334                 } else if (me == 2) {
1335                         const struct gfx_firmware_header_v1_0 *hdr =
1336                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1337                         fw_data = (const __le32 *)
1338                                 (adev->gfx.me_fw->data +
1339                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1340                         table_offset = le32_to_cpu(hdr->jt_offset);
1341                         table_size = le32_to_cpu(hdr->jt_size);
1342                 } else if (me == 3) {
1343                         const struct gfx_firmware_header_v1_0 *hdr =
1344                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1345                         fw_data = (const __le32 *)
1346                                 (adev->gfx.mec_fw->data +
1347                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1348                         table_offset = le32_to_cpu(hdr->jt_offset);
1349                         table_size = le32_to_cpu(hdr->jt_size);
1350                 } else  if (me == 4) {
1351                         const struct gfx_firmware_header_v1_0 *hdr =
1352                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
1353                         fw_data = (const __le32 *)
1354                                 (adev->gfx.mec2_fw->data +
1355                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1356                         table_offset = le32_to_cpu(hdr->jt_offset);
1357                         table_size = le32_to_cpu(hdr->jt_size);
1358                 }
1359
1360                 for (i = 0; i < table_size; i ++) {
1361                         dst_ptr[bo_offset + i] =
1362                                 cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
1363                 }
1364
1365                 bo_offset += table_size;
1366         }
1367 }
1368
1369 static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev)
1370 {
1371         amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, NULL, NULL);
1372         amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, NULL, NULL);
1373 }
1374
1375 static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
1376 {
1377         volatile u32 *dst_ptr;
1378         u32 dws;
1379         const struct cs_section_def *cs_data;
1380         int r;
1381
1382         adev->gfx.rlc.cs_data = vi_cs_data;
1383
1384         cs_data = adev->gfx.rlc.cs_data;
1385
1386         if (cs_data) {
1387                 /* clear state block */
1388                 adev->gfx.rlc.clear_state_size = dws = gfx_v8_0_get_csb_size(adev);
1389
1390                 r = amdgpu_bo_create_reserved(adev, dws * 4, PAGE_SIZE,
1391                                               AMDGPU_GEM_DOMAIN_VRAM,
1392                                               &adev->gfx.rlc.clear_state_obj,
1393                                               &adev->gfx.rlc.clear_state_gpu_addr,
1394                                               (void **)&adev->gfx.rlc.cs_ptr);
1395                 if (r) {
1396                         dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r);
1397                         gfx_v8_0_rlc_fini(adev);
1398                         return r;
1399                 }
1400
1401                 /* set up the cs buffer */
1402                 dst_ptr = adev->gfx.rlc.cs_ptr;
1403                 gfx_v8_0_get_csb_buffer(adev, dst_ptr);
1404                 amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
1405                 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1406         }
1407
1408         if ((adev->asic_type == CHIP_CARRIZO) ||
1409             (adev->asic_type == CHIP_STONEY)) {
1410                 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1411                 r = amdgpu_bo_create_reserved(adev, adev->gfx.rlc.cp_table_size,
1412                                               PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
1413                                               &adev->gfx.rlc.cp_table_obj,
1414                                               &adev->gfx.rlc.cp_table_gpu_addr,
1415                                               (void **)&adev->gfx.rlc.cp_table_ptr);
1416                 if (r) {
1417                         dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r);
1418                         return r;
1419                 }
1420
1421                 cz_init_cp_jump_table(adev);
1422
1423                 amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj);
1424                 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1425         }
1426
1427         return 0;
1428 }
1429
1430 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
1431 {
1432         amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1433 }
1434
1435 static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
1436 {
1437         int r;
1438         u32 *hpd;
1439         size_t mec_hpd_size;
1440
1441         bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1442
1443         /* take ownership of the relevant compute queues */
1444         amdgpu_gfx_compute_queue_acquire(adev);
1445
1446         mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE;
1447
1448         r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1449                                       AMDGPU_GEM_DOMAIN_GTT,
1450                                       &adev->gfx.mec.hpd_eop_obj,
1451                                       &adev->gfx.mec.hpd_eop_gpu_addr,
1452                                       (void **)&hpd);
1453         if (r) {
1454                 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1455                 return r;
1456         }
1457
1458         memset(hpd, 0, mec_hpd_size);
1459
1460         amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1461         amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1462
1463         return 0;
1464 }
1465
1466 static const u32 vgpr_init_compute_shader[] =
1467 {
1468         0x7e000209, 0x7e020208,
1469         0x7e040207, 0x7e060206,
1470         0x7e080205, 0x7e0a0204,
1471         0x7e0c0203, 0x7e0e0202,
1472         0x7e100201, 0x7e120200,
1473         0x7e140209, 0x7e160208,
1474         0x7e180207, 0x7e1a0206,
1475         0x7e1c0205, 0x7e1e0204,
1476         0x7e200203, 0x7e220202,
1477         0x7e240201, 0x7e260200,
1478         0x7e280209, 0x7e2a0208,
1479         0x7e2c0207, 0x7e2e0206,
1480         0x7e300205, 0x7e320204,
1481         0x7e340203, 0x7e360202,
1482         0x7e380201, 0x7e3a0200,
1483         0x7e3c0209, 0x7e3e0208,
1484         0x7e400207, 0x7e420206,
1485         0x7e440205, 0x7e460204,
1486         0x7e480203, 0x7e4a0202,
1487         0x7e4c0201, 0x7e4e0200,
1488         0x7e500209, 0x7e520208,
1489         0x7e540207, 0x7e560206,
1490         0x7e580205, 0x7e5a0204,
1491         0x7e5c0203, 0x7e5e0202,
1492         0x7e600201, 0x7e620200,
1493         0x7e640209, 0x7e660208,
1494         0x7e680207, 0x7e6a0206,
1495         0x7e6c0205, 0x7e6e0204,
1496         0x7e700203, 0x7e720202,
1497         0x7e740201, 0x7e760200,
1498         0x7e780209, 0x7e7a0208,
1499         0x7e7c0207, 0x7e7e0206,
1500         0xbf8a0000, 0xbf810000,
1501 };
1502
1503 static const u32 sgpr_init_compute_shader[] =
1504 {
1505         0xbe8a0100, 0xbe8c0102,
1506         0xbe8e0104, 0xbe900106,
1507         0xbe920108, 0xbe940100,
1508         0xbe960102, 0xbe980104,
1509         0xbe9a0106, 0xbe9c0108,
1510         0xbe9e0100, 0xbea00102,
1511         0xbea20104, 0xbea40106,
1512         0xbea60108, 0xbea80100,
1513         0xbeaa0102, 0xbeac0104,
1514         0xbeae0106, 0xbeb00108,
1515         0xbeb20100, 0xbeb40102,
1516         0xbeb60104, 0xbeb80106,
1517         0xbeba0108, 0xbebc0100,
1518         0xbebe0102, 0xbec00104,
1519         0xbec20106, 0xbec40108,
1520         0xbec60100, 0xbec80102,
1521         0xbee60004, 0xbee70005,
1522         0xbeea0006, 0xbeeb0007,
1523         0xbee80008, 0xbee90009,
1524         0xbefc0000, 0xbf8a0000,
1525         0xbf810000, 0x00000000,
1526 };
1527
1528 static const u32 vgpr_init_regs[] =
1529 {
1530         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1531         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */
1532         mmCOMPUTE_NUM_THREAD_X, 256*4,
1533         mmCOMPUTE_NUM_THREAD_Y, 1,
1534         mmCOMPUTE_NUM_THREAD_Z, 1,
1535         mmCOMPUTE_PGM_RSRC1, 0x100004f, /* VGPRS=15 (64 logical VGPRs), SGPRS=1 (16 SGPRs), BULKY=1 */
1536         mmCOMPUTE_PGM_RSRC2, 20,
1537         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1538         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1539         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1540         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1541         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1542         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1543         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1544         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1545         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1546         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1547 };
1548
1549 static const u32 sgpr1_init_regs[] =
1550 {
1551         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1552         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */
1553         mmCOMPUTE_NUM_THREAD_X, 256*5,
1554         mmCOMPUTE_NUM_THREAD_Y, 1,
1555         mmCOMPUTE_NUM_THREAD_Z, 1,
1556         mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */
1557         mmCOMPUTE_PGM_RSRC2, 20,
1558         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1559         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1560         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1561         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1562         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1563         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1564         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1565         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1566         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1567         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1568 };
1569
1570 static const u32 sgpr2_init_regs[] =
1571 {
1572         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1573         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1574         mmCOMPUTE_NUM_THREAD_X, 256*5,
1575         mmCOMPUTE_NUM_THREAD_Y, 1,
1576         mmCOMPUTE_NUM_THREAD_Z, 1,
1577         mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */
1578         mmCOMPUTE_PGM_RSRC2, 20,
1579         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1580         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1581         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1582         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1583         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1584         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1585         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1586         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1587         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1588         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1589 };
1590
1591 static const u32 sec_ded_counter_registers[] =
1592 {
1593         mmCPC_EDC_ATC_CNT,
1594         mmCPC_EDC_SCRATCH_CNT,
1595         mmCPC_EDC_UCODE_CNT,
1596         mmCPF_EDC_ATC_CNT,
1597         mmCPF_EDC_ROQ_CNT,
1598         mmCPF_EDC_TAG_CNT,
1599         mmCPG_EDC_ATC_CNT,
1600         mmCPG_EDC_DMA_CNT,
1601         mmCPG_EDC_TAG_CNT,
1602         mmDC_EDC_CSINVOC_CNT,
1603         mmDC_EDC_RESTORE_CNT,
1604         mmDC_EDC_STATE_CNT,
1605         mmGDS_EDC_CNT,
1606         mmGDS_EDC_GRBM_CNT,
1607         mmGDS_EDC_OA_DED,
1608         mmSPI_EDC_CNT,
1609         mmSQC_ATC_EDC_GATCL1_CNT,
1610         mmSQC_EDC_CNT,
1611         mmSQ_EDC_DED_CNT,
1612         mmSQ_EDC_INFO,
1613         mmSQ_EDC_SEC_CNT,
1614         mmTCC_EDC_CNT,
1615         mmTCP_ATC_EDC_GATCL1_CNT,
1616         mmTCP_EDC_CNT,
1617         mmTD_EDC_CNT
1618 };
1619
1620 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1621 {
1622         struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1623         struct amdgpu_ib ib;
1624         struct dma_fence *f = NULL;
1625         int r, i;
1626         u32 tmp;
1627         unsigned total_size, vgpr_offset, sgpr_offset;
1628         u64 gpu_addr;
1629
1630         /* only supported on CZ */
1631         if (adev->asic_type != CHIP_CARRIZO)
1632                 return 0;
1633
1634         /* bail if the compute ring is not ready */
1635         if (!ring->ready)
1636                 return 0;
1637
1638         tmp = RREG32(mmGB_EDC_MODE);
1639         WREG32(mmGB_EDC_MODE, 0);
1640
1641         total_size =
1642                 (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1643         total_size +=
1644                 (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1645         total_size +=
1646                 (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1647         total_size = ALIGN(total_size, 256);
1648         vgpr_offset = total_size;
1649         total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1650         sgpr_offset = total_size;
1651         total_size += sizeof(sgpr_init_compute_shader);
1652
1653         /* allocate an indirect buffer to put the commands in */
1654         memset(&ib, 0, sizeof(ib));
1655         r = amdgpu_ib_get(adev, NULL, total_size, &ib);
1656         if (r) {
1657                 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1658                 return r;
1659         }
1660
1661         /* load the compute shaders */
1662         for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1663                 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1664
1665         for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1666                 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1667
1668         /* init the ib length to 0 */
1669         ib.length_dw = 0;
1670
1671         /* VGPR */
1672         /* write the register state for the compute dispatch */
1673         for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1674                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1675                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1676                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1677         }
1678         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1679         gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1680         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1681         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1682         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1683         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1684
1685         /* write dispatch packet */
1686         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1687         ib.ptr[ib.length_dw++] = 8; /* x */
1688         ib.ptr[ib.length_dw++] = 1; /* y */
1689         ib.ptr[ib.length_dw++] = 1; /* z */
1690         ib.ptr[ib.length_dw++] =
1691                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1692
1693         /* write CS partial flush packet */
1694         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1695         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1696
1697         /* SGPR1 */
1698         /* write the register state for the compute dispatch */
1699         for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1700                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1701                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1702                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1703         }
1704         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1705         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1706         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1707         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1708         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1709         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1710
1711         /* write dispatch packet */
1712         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1713         ib.ptr[ib.length_dw++] = 8; /* x */
1714         ib.ptr[ib.length_dw++] = 1; /* y */
1715         ib.ptr[ib.length_dw++] = 1; /* z */
1716         ib.ptr[ib.length_dw++] =
1717                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1718
1719         /* write CS partial flush packet */
1720         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1721         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1722
1723         /* SGPR2 */
1724         /* write the register state for the compute dispatch */
1725         for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1726                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1727                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1728                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1729         }
1730         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1731         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1732         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1733         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1734         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1735         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1736
1737         /* write dispatch packet */
1738         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1739         ib.ptr[ib.length_dw++] = 8; /* x */
1740         ib.ptr[ib.length_dw++] = 1; /* y */
1741         ib.ptr[ib.length_dw++] = 1; /* z */
1742         ib.ptr[ib.length_dw++] =
1743                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1744
1745         /* write CS partial flush packet */
1746         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1747         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1748
1749         /* shedule the ib on the ring */
1750         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1751         if (r) {
1752                 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1753                 goto fail;
1754         }
1755
1756         /* wait for the GPU to finish processing the IB */
1757         r = dma_fence_wait(f, false);
1758         if (r) {
1759                 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1760                 goto fail;
1761         }
1762
1763         tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1764         tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1765         WREG32(mmGB_EDC_MODE, tmp);
1766
1767         tmp = RREG32(mmCC_GC_EDC_CONFIG);
1768         tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1769         WREG32(mmCC_GC_EDC_CONFIG, tmp);
1770
1771
1772         /* read back registers to clear the counters */
1773         for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1774                 RREG32(sec_ded_counter_registers[i]);
1775
1776 fail:
1777         amdgpu_ib_free(adev, &ib, NULL);
1778         dma_fence_put(f);
1779
1780         return r;
1781 }
1782
1783 static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1784 {
1785         u32 gb_addr_config;
1786         u32 mc_shared_chmap, mc_arb_ramcfg;
1787         u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1788         u32 tmp;
1789         int ret;
1790
1791         switch (adev->asic_type) {
1792         case CHIP_TOPAZ:
1793                 adev->gfx.config.max_shader_engines = 1;
1794                 adev->gfx.config.max_tile_pipes = 2;
1795                 adev->gfx.config.max_cu_per_sh = 6;
1796                 adev->gfx.config.max_sh_per_se = 1;
1797                 adev->gfx.config.max_backends_per_se = 2;
1798                 adev->gfx.config.max_texture_channel_caches = 2;
1799                 adev->gfx.config.max_gprs = 256;
1800                 adev->gfx.config.max_gs_threads = 32;
1801                 adev->gfx.config.max_hw_contexts = 8;
1802
1803                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1804                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1805                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1806                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1807                 gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1808                 break;
1809         case CHIP_FIJI:
1810                 adev->gfx.config.max_shader_engines = 4;
1811                 adev->gfx.config.max_tile_pipes = 16;
1812                 adev->gfx.config.max_cu_per_sh = 16;
1813                 adev->gfx.config.max_sh_per_se = 1;
1814                 adev->gfx.config.max_backends_per_se = 4;
1815                 adev->gfx.config.max_texture_channel_caches = 16;
1816                 adev->gfx.config.max_gprs = 256;
1817                 adev->gfx.config.max_gs_threads = 32;
1818                 adev->gfx.config.max_hw_contexts = 8;
1819
1820                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1821                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1822                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1823                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1824                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1825                 break;
1826         case CHIP_POLARIS11:
1827         case CHIP_POLARIS12:
1828                 ret = amdgpu_atombios_get_gfx_info(adev);
1829                 if (ret)
1830                         return ret;
1831                 adev->gfx.config.max_gprs = 256;
1832                 adev->gfx.config.max_gs_threads = 32;
1833                 adev->gfx.config.max_hw_contexts = 8;
1834
1835                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1836                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1837                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1838                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1839                 gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
1840                 break;
1841         case CHIP_POLARIS10:
1842         case CHIP_VEGAM:
1843                 ret = amdgpu_atombios_get_gfx_info(adev);
1844                 if (ret)
1845                         return ret;
1846                 adev->gfx.config.max_gprs = 256;
1847                 adev->gfx.config.max_gs_threads = 32;
1848                 adev->gfx.config.max_hw_contexts = 8;
1849
1850                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1851                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1852                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1853                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1854                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1855                 break;
1856         case CHIP_TONGA:
1857                 adev->gfx.config.max_shader_engines = 4;
1858                 adev->gfx.config.max_tile_pipes = 8;
1859                 adev->gfx.config.max_cu_per_sh = 8;
1860                 adev->gfx.config.max_sh_per_se = 1;
1861                 adev->gfx.config.max_backends_per_se = 2;
1862                 adev->gfx.config.max_texture_channel_caches = 8;
1863                 adev->gfx.config.max_gprs = 256;
1864                 adev->gfx.config.max_gs_threads = 32;
1865                 adev->gfx.config.max_hw_contexts = 8;
1866
1867                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1868                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1869                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1870                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1871                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1872                 break;
1873         case CHIP_CARRIZO:
1874                 adev->gfx.config.max_shader_engines = 1;
1875                 adev->gfx.config.max_tile_pipes = 2;
1876                 adev->gfx.config.max_sh_per_se = 1;
1877                 adev->gfx.config.max_backends_per_se = 2;
1878                 adev->gfx.config.max_cu_per_sh = 8;
1879                 adev->gfx.config.max_texture_channel_caches = 2;
1880                 adev->gfx.config.max_gprs = 256;
1881                 adev->gfx.config.max_gs_threads = 32;
1882                 adev->gfx.config.max_hw_contexts = 8;
1883
1884                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1885                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1886                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1887                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1888                 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1889                 break;
1890         case CHIP_STONEY:
1891                 adev->gfx.config.max_shader_engines = 1;
1892                 adev->gfx.config.max_tile_pipes = 2;
1893                 adev->gfx.config.max_sh_per_se = 1;
1894                 adev->gfx.config.max_backends_per_se = 1;
1895                 adev->gfx.config.max_cu_per_sh = 3;
1896                 adev->gfx.config.max_texture_channel_caches = 2;
1897                 adev->gfx.config.max_gprs = 256;
1898                 adev->gfx.config.max_gs_threads = 16;
1899                 adev->gfx.config.max_hw_contexts = 8;
1900
1901                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1902                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1903                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1904                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1905                 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1906                 break;
1907         default:
1908                 adev->gfx.config.max_shader_engines = 2;
1909                 adev->gfx.config.max_tile_pipes = 4;
1910                 adev->gfx.config.max_cu_per_sh = 2;
1911                 adev->gfx.config.max_sh_per_se = 1;
1912                 adev->gfx.config.max_backends_per_se = 2;
1913                 adev->gfx.config.max_texture_channel_caches = 4;
1914                 adev->gfx.config.max_gprs = 256;
1915                 adev->gfx.config.max_gs_threads = 32;
1916                 adev->gfx.config.max_hw_contexts = 8;
1917
1918                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1919                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1920                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1921                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1922                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1923                 break;
1924         }
1925
1926         mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
1927         adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1928         mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1929
1930         adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1931         adev->gfx.config.mem_max_burst_length_bytes = 256;
1932         if (adev->flags & AMD_IS_APU) {
1933                 /* Get memory bank mapping mode. */
1934                 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
1935                 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1936                 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1937
1938                 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
1939                 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1940                 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1941
1942                 /* Validate settings in case only one DIMM installed. */
1943                 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
1944                         dimm00_addr_map = 0;
1945                 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
1946                         dimm01_addr_map = 0;
1947                 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
1948                         dimm10_addr_map = 0;
1949                 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
1950                         dimm11_addr_map = 0;
1951
1952                 /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1953                 /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1954                 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
1955                         adev->gfx.config.mem_row_size_in_kb = 2;
1956                 else
1957                         adev->gfx.config.mem_row_size_in_kb = 1;
1958         } else {
1959                 tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
1960                 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1961                 if (adev->gfx.config.mem_row_size_in_kb > 4)
1962                         adev->gfx.config.mem_row_size_in_kb = 4;
1963         }
1964
1965         adev->gfx.config.shader_engine_tile_size = 32;
1966         adev->gfx.config.num_gpus = 1;
1967         adev->gfx.config.multi_gpu_tile_size = 64;
1968
1969         /* fix up row size */
1970         switch (adev->gfx.config.mem_row_size_in_kb) {
1971         case 1:
1972         default:
1973                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
1974                 break;
1975         case 2:
1976                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
1977                 break;
1978         case 4:
1979                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
1980                 break;
1981         }
1982         adev->gfx.config.gb_addr_config = gb_addr_config;
1983
1984         return 0;
1985 }
1986
1987 static int gfx_v8_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
1988                                         int mec, int pipe, int queue)
1989 {
1990         int r;
1991         unsigned irq_type;
1992         struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
1993
1994         ring = &adev->gfx.compute_ring[ring_id];
1995
1996         /* mec0 is me1 */
1997         ring->me = mec + 1;
1998         ring->pipe = pipe;
1999         ring->queue = queue;
2000
2001         ring->ring_obj = NULL;
2002         ring->use_doorbell = true;
2003         ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + ring_id;
2004         ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
2005                                 + (ring_id * GFX8_MEC_HPD_SIZE);
2006         sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
2007
2008         irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
2009                 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
2010                 + ring->pipe;
2011
2012         /* type-2 packets are deprecated on MEC, use type-3 instead */
2013         r = amdgpu_ring_init(adev, ring, 1024,
2014                         &adev->gfx.eop_irq, irq_type);
2015         if (r)
2016                 return r;
2017
2018
2019         return 0;
2020 }
2021
2022 static void gfx_v8_0_sq_irq_work_func(struct work_struct *work);
2023
2024 static int gfx_v8_0_sw_init(void *handle)
2025 {
2026         int i, j, k, r, ring_id;
2027         struct amdgpu_ring *ring;
2028         struct amdgpu_kiq *kiq;
2029         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2030
2031         switch (adev->asic_type) {
2032         case CHIP_TONGA:
2033         case CHIP_CARRIZO:
2034         case CHIP_FIJI:
2035         case CHIP_POLARIS10:
2036         case CHIP_POLARIS11:
2037         case CHIP_POLARIS12:
2038         case CHIP_VEGAM:
2039                 adev->gfx.mec.num_mec = 2;
2040                 break;
2041         case CHIP_TOPAZ:
2042         case CHIP_STONEY:
2043         default:
2044                 adev->gfx.mec.num_mec = 1;
2045                 break;
2046         }
2047
2048         adev->gfx.mec.num_pipe_per_mec = 4;
2049         adev->gfx.mec.num_queue_per_pipe = 8;
2050
2051         /* KIQ event */
2052         r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_INT_IB2, &adev->gfx.kiq.irq);
2053         if (r)
2054                 return r;
2055
2056         /* EOP Event */
2057         r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_END_OF_PIPE, &adev->gfx.eop_irq);
2058         if (r)
2059                 return r;
2060
2061         /* Privileged reg */
2062         r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_PRIV_REG_FAULT,
2063                               &adev->gfx.priv_reg_irq);
2064         if (r)
2065                 return r;
2066
2067         /* Privileged inst */
2068         r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_PRIV_INSTR_FAULT,
2069                               &adev->gfx.priv_inst_irq);
2070         if (r)
2071                 return r;
2072
2073         /* Add CP EDC/ECC irq  */
2074         r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_ECC_ERROR,
2075                               &adev->gfx.cp_ecc_error_irq);
2076         if (r)
2077                 return r;
2078
2079         /* SQ interrupts. */
2080         r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SQ_INTERRUPT_MSG,
2081                               &adev->gfx.sq_irq);
2082         if (r) {
2083                 DRM_ERROR("amdgpu_irq_add() for SQ failed: %d\n", r);
2084                 return r;
2085         }
2086
2087         INIT_WORK(&adev->gfx.sq_work.work, gfx_v8_0_sq_irq_work_func);
2088
2089         adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2090
2091         gfx_v8_0_scratch_init(adev);
2092
2093         r = gfx_v8_0_init_microcode(adev);
2094         if (r) {
2095                 DRM_ERROR("Failed to load gfx firmware!\n");
2096                 return r;
2097         }
2098
2099         r = gfx_v8_0_rlc_init(adev);
2100         if (r) {
2101                 DRM_ERROR("Failed to init rlc BOs!\n");
2102                 return r;
2103         }
2104
2105         r = gfx_v8_0_mec_init(adev);
2106         if (r) {
2107                 DRM_ERROR("Failed to init MEC BOs!\n");
2108                 return r;
2109         }
2110
2111         /* set up the gfx ring */
2112         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2113                 ring = &adev->gfx.gfx_ring[i];
2114                 ring->ring_obj = NULL;
2115                 sprintf(ring->name, "gfx");
2116                 /* no gfx doorbells on iceland */
2117                 if (adev->asic_type != CHIP_TOPAZ) {
2118                         ring->use_doorbell = true;
2119                         ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0;
2120                 }
2121
2122                 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2123                                      AMDGPU_CP_IRQ_GFX_EOP);
2124                 if (r)
2125                         return r;
2126         }
2127
2128
2129         /* set up the compute queues - allocate horizontally across pipes */
2130         ring_id = 0;
2131         for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
2132                 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
2133                         for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
2134                                 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
2135                                         continue;
2136
2137                                 r = gfx_v8_0_compute_ring_init(adev,
2138                                                                 ring_id,
2139                                                                 i, k, j);
2140                                 if (r)
2141                                         return r;
2142
2143                                 ring_id++;
2144                         }
2145                 }
2146         }
2147
2148         r = amdgpu_gfx_kiq_init(adev, GFX8_MEC_HPD_SIZE);
2149         if (r) {
2150                 DRM_ERROR("Failed to init KIQ BOs!\n");
2151                 return r;
2152         }
2153
2154         kiq = &adev->gfx.kiq;
2155         r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
2156         if (r)
2157                 return r;
2158
2159         /* create MQD for all compute queues as well as KIQ for SRIOV case */
2160         r = amdgpu_gfx_compute_mqd_sw_init(adev, sizeof(struct vi_mqd_allocation));
2161         if (r)
2162                 return r;
2163
2164         adev->gfx.ce_ram_size = 0x8000;
2165
2166         r = gfx_v8_0_gpu_early_init(adev);
2167         if (r)
2168                 return r;
2169
2170         return 0;
2171 }
2172
2173 static int gfx_v8_0_sw_fini(void *handle)
2174 {
2175         int i;
2176         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2177
2178         amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
2179         amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL);
2180         amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
2181
2182         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2183                 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2184         for (i = 0; i < adev->gfx.num_compute_rings; i++)
2185                 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2186
2187         amdgpu_gfx_compute_mqd_sw_fini(adev);
2188         amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
2189         amdgpu_gfx_kiq_fini(adev);
2190
2191         gfx_v8_0_mec_fini(adev);
2192         gfx_v8_0_rlc_fini(adev);
2193         amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
2194                                 &adev->gfx.rlc.clear_state_gpu_addr,
2195                                 (void **)&adev->gfx.rlc.cs_ptr);
2196         if ((adev->asic_type == CHIP_CARRIZO) ||
2197             (adev->asic_type == CHIP_STONEY)) {
2198                 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
2199                                 &adev->gfx.rlc.cp_table_gpu_addr,
2200                                 (void **)&adev->gfx.rlc.cp_table_ptr);
2201         }
2202         gfx_v8_0_free_microcode(adev);
2203
2204         return 0;
2205 }
2206
2207 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
2208 {
2209         uint32_t *modearray, *mod2array;
2210         const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
2211         const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
2212         u32 reg_offset;
2213
2214         modearray = adev->gfx.config.tile_mode_array;
2215         mod2array = adev->gfx.config.macrotile_mode_array;
2216
2217         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2218                 modearray[reg_offset] = 0;
2219
2220         for (reg_offset = 0; reg_offset <  num_secondary_tile_mode_states; reg_offset++)
2221                 mod2array[reg_offset] = 0;
2222
2223         switch (adev->asic_type) {
2224         case CHIP_TOPAZ:
2225                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2226                                 PIPE_CONFIG(ADDR_SURF_P2) |
2227                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2228                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2229                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2230                                 PIPE_CONFIG(ADDR_SURF_P2) |
2231                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2232                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2233                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2234                                 PIPE_CONFIG(ADDR_SURF_P2) |
2235                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2236                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2237                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2238                                 PIPE_CONFIG(ADDR_SURF_P2) |
2239                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2240                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2241                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2242                                 PIPE_CONFIG(ADDR_SURF_P2) |
2243                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2244                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2245                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2246                                 PIPE_CONFIG(ADDR_SURF_P2) |
2247                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2248                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2249                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2250                                 PIPE_CONFIG(ADDR_SURF_P2) |
2251                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2252                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2253                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2254                                 PIPE_CONFIG(ADDR_SURF_P2));
2255                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2256                                 PIPE_CONFIG(ADDR_SURF_P2) |
2257                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2258                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2259                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2260                                  PIPE_CONFIG(ADDR_SURF_P2) |
2261                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2262                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2263                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2264                                  PIPE_CONFIG(ADDR_SURF_P2) |
2265                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2266                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2267                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2268                                  PIPE_CONFIG(ADDR_SURF_P2) |
2269                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2270                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2271                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2272                                  PIPE_CONFIG(ADDR_SURF_P2) |
2273                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2274                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2275                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2276                                  PIPE_CONFIG(ADDR_SURF_P2) |
2277                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2278                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2279                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2280                                  PIPE_CONFIG(ADDR_SURF_P2) |
2281                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2282                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2283                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2284                                  PIPE_CONFIG(ADDR_SURF_P2) |
2285                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2286                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2287                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2288                                  PIPE_CONFIG(ADDR_SURF_P2) |
2289                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2290                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2291                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2292                                  PIPE_CONFIG(ADDR_SURF_P2) |
2293                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2294                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2295                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2296                                  PIPE_CONFIG(ADDR_SURF_P2) |
2297                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2298                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2299                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2300                                  PIPE_CONFIG(ADDR_SURF_P2) |
2301                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2302                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2303                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2304                                  PIPE_CONFIG(ADDR_SURF_P2) |
2305                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2306                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2307                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2308                                  PIPE_CONFIG(ADDR_SURF_P2) |
2309                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2310                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2311                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2312                                  PIPE_CONFIG(ADDR_SURF_P2) |
2313                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2314                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2315                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2316                                  PIPE_CONFIG(ADDR_SURF_P2) |
2317                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2318                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2319                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2320                                  PIPE_CONFIG(ADDR_SURF_P2) |
2321                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2322                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2323                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2324                                  PIPE_CONFIG(ADDR_SURF_P2) |
2325                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2326                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2327
2328                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2329                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2330                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2331                                 NUM_BANKS(ADDR_SURF_8_BANK));
2332                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2333                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2334                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2335                                 NUM_BANKS(ADDR_SURF_8_BANK));
2336                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2337                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2338                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2339                                 NUM_BANKS(ADDR_SURF_8_BANK));
2340                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2341                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2342                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2343                                 NUM_BANKS(ADDR_SURF_8_BANK));
2344                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2345                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2346                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2347                                 NUM_BANKS(ADDR_SURF_8_BANK));
2348                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2349                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2350                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2351                                 NUM_BANKS(ADDR_SURF_8_BANK));
2352                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2353                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2354                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2355                                 NUM_BANKS(ADDR_SURF_8_BANK));
2356                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2357                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2358                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2359                                 NUM_BANKS(ADDR_SURF_16_BANK));
2360                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2361                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2362                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2363                                 NUM_BANKS(ADDR_SURF_16_BANK));
2364                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2365                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2366                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2367                                  NUM_BANKS(ADDR_SURF_16_BANK));
2368                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2369                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2370                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2371                                  NUM_BANKS(ADDR_SURF_16_BANK));
2372                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2373                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2374                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2375                                  NUM_BANKS(ADDR_SURF_16_BANK));
2376                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2377                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2378                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2379                                  NUM_BANKS(ADDR_SURF_16_BANK));
2380                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2381                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2382                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2383                                  NUM_BANKS(ADDR_SURF_8_BANK));
2384
2385                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2386                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2387                             reg_offset != 23)
2388                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2389
2390                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2391                         if (reg_offset != 7)
2392                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2393
2394                 break;
2395         case CHIP_FIJI:
2396         case CHIP_VEGAM:
2397                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2398                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2399                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2400                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2401                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2402                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2403                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2404                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2405                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2406                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2407                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2408                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2409                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2410                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2411                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2412                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2413                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2414                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2415                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2416                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2417                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2418                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2419                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2420                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2421                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2422                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2423                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2424                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2425                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2426                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2427                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2428                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2429                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2430                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2431                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2432                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2433                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2434                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2435                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2436                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2437                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2438                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2439                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2440                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2441                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2442                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2443                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2444                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2445                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2446                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2447                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2448                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2449                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2450                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2451                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2452                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2453                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2454                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2455                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2456                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2457                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2458                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2459                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2460                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2461                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2462                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2463                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2464                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2465                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2466                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2467                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2468                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2469                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2470                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2471                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2472                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2473                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2474                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2475                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2476                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2477                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2478                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2479                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2480                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2481                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2482                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2483                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2484                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2485                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2486                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2487                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2488                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2489                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2490                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2491                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2492                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2493                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2494                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2495                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2496                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2497                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2498                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2499                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2500                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2501                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2502                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2503                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2504                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2505                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2506                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2507                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2508                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2509                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2510                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2511                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2512                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2513                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2514                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2515                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2516                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2517                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2518                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2519
2520                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2521                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2522                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2523                                 NUM_BANKS(ADDR_SURF_8_BANK));
2524                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2525                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2526                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2527                                 NUM_BANKS(ADDR_SURF_8_BANK));
2528                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2529                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2530                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2531                                 NUM_BANKS(ADDR_SURF_8_BANK));
2532                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2533                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2534                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2535                                 NUM_BANKS(ADDR_SURF_8_BANK));
2536                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2537                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2538                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2539                                 NUM_BANKS(ADDR_SURF_8_BANK));
2540                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2541                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2542                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2543                                 NUM_BANKS(ADDR_SURF_8_BANK));
2544                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2545                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2546                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2547                                 NUM_BANKS(ADDR_SURF_8_BANK));
2548                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2549                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2550                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2551                                 NUM_BANKS(ADDR_SURF_8_BANK));
2552                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2553                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2554                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2555                                 NUM_BANKS(ADDR_SURF_8_BANK));
2556                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2557                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2558                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2559                                  NUM_BANKS(ADDR_SURF_8_BANK));
2560                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2561                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2562                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2563                                  NUM_BANKS(ADDR_SURF_8_BANK));
2564                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2565                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2566                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2567                                  NUM_BANKS(ADDR_SURF_8_BANK));
2568                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2569                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2570                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2571                                  NUM_BANKS(ADDR_SURF_8_BANK));
2572                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2573                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2574                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2575                                  NUM_BANKS(ADDR_SURF_4_BANK));
2576
2577                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2578                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2579
2580                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2581                         if (reg_offset != 7)
2582                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2583
2584                 break;
2585         case CHIP_TONGA:
2586                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2587                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2588                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2589                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2590                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2591                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2592                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2593                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2594                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2595                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2596                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2597                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2598                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2599                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2600                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2601                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2602                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2603                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2604                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2605                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2606                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2607                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2608                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2609                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2610                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2611                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2612                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2613                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2614                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2615                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2616                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2617                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2618                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2619                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2620                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2621                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2622                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2623                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2624                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2625                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2626                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2627                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2628                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2629                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2630                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2631                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2632                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2633                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2634                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2635                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2636                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2637                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2638                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2639                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2640                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2641                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2642                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2643                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2644                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2645                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2646                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2647                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2648                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2649                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2650                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2651                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2652                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2653                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2654                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2655                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2656                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2657                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2658                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2659                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2660                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2661                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2662                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2663                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2664                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2665                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2666                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2667                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2668                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2669                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2670                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2671                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2672                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2673                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2674                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2675                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2676                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2677                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2678                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2679                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2680                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2681                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2682                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2683                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2684                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2685                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2686                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2687                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2688                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2689                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2690                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2691                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2692                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2693                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2694                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2695                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2696                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2697                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2698                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2699                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2700                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2701                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2702                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2703                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2704                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2705                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2706                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2707                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2708
2709                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2710                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2711                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2712                                 NUM_BANKS(ADDR_SURF_16_BANK));
2713                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2714                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2715                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2716                                 NUM_BANKS(ADDR_SURF_16_BANK));
2717                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2718                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2719                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2720                                 NUM_BANKS(ADDR_SURF_16_BANK));
2721                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2722                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2723                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2724                                 NUM_BANKS(ADDR_SURF_16_BANK));
2725                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2726                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2727                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2728                                 NUM_BANKS(ADDR_SURF_16_BANK));
2729                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2730                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2731                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2732                                 NUM_BANKS(ADDR_SURF_16_BANK));
2733                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2734                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2735                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2736                                 NUM_BANKS(ADDR_SURF_16_BANK));
2737                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2738                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2739                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2740                                 NUM_BANKS(ADDR_SURF_16_BANK));
2741                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2742                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2743                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2744                                 NUM_BANKS(ADDR_SURF_16_BANK));
2745                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2746                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2747                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2748                                  NUM_BANKS(ADDR_SURF_16_BANK));
2749                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2750                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2751                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2752                                  NUM_BANKS(ADDR_SURF_16_BANK));
2753                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2754                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2755                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2756                                  NUM_BANKS(ADDR_SURF_8_BANK));
2757                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2758                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2759                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2760                                  NUM_BANKS(ADDR_SURF_4_BANK));
2761                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2762                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2763                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2764                                  NUM_BANKS(ADDR_SURF_4_BANK));
2765
2766                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2767                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2768
2769                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2770                         if (reg_offset != 7)
2771                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2772
2773                 break;
2774         case CHIP_POLARIS11:
2775         case CHIP_POLARIS12:
2776                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2777                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2778                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2779                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2780                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2781                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2782                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2783                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2784                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2785                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2786                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2787                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2788                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2789                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2790                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2791                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2792                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2793                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2794                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2795                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2796                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2797                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2798                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2799                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2800                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2801                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2802                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2803                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2804                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2805                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2806                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2807                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2808                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2809                                 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2810                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2811                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2812                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2813                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2814                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2815                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2816                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2817                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2818                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2819                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2820                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2821                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2822                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2823                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2824                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2825                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2826                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2827                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2828                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2829                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2830                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2831                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2832                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2833                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2834                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2835                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2836                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2837                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2838                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2839                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2840                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2841                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2842                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2843                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2844                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2845                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2846                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2847                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2848                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2849                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2850                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2851                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2852                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2853                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2854                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2855                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2856                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2857                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2858                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2859                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2860                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2861                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2862                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2863                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2864                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2865                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2866                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2867                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2868                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2869                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2870                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2871                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2872                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2873                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2874                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2875                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2876                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2877                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2878                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2879                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2880                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2881                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2882                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2883                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2884                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2885                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2886                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2887                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2888                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2889                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2890                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2891                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2892                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2893                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2894                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2895                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2896                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2897                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2898
2899                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2900                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2901                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2902                                 NUM_BANKS(ADDR_SURF_16_BANK));
2903
2904                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2905                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2906                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2907                                 NUM_BANKS(ADDR_SURF_16_BANK));
2908
2909                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2910                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2911                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2912                                 NUM_BANKS(ADDR_SURF_16_BANK));
2913
2914                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2915                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2916                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2917                                 NUM_BANKS(ADDR_SURF_16_BANK));
2918
2919                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2920                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2921                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2922                                 NUM_BANKS(ADDR_SURF_16_BANK));
2923
2924                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2925                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2926                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2927                                 NUM_BANKS(ADDR_SURF_16_BANK));
2928
2929                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2930                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2931                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2932                                 NUM_BANKS(ADDR_SURF_16_BANK));
2933
2934                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2935                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2936                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2937                                 NUM_BANKS(ADDR_SURF_16_BANK));
2938
2939                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2940                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2941                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2942                                 NUM_BANKS(ADDR_SURF_16_BANK));
2943
2944                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2945                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2946                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2947                                 NUM_BANKS(ADDR_SURF_16_BANK));
2948
2949                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2950                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2951                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2952                                 NUM_BANKS(ADDR_SURF_16_BANK));
2953
2954                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2955                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2956                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2957                                 NUM_BANKS(ADDR_SURF_16_BANK));
2958
2959                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2960                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2961                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2962                                 NUM_BANKS(ADDR_SURF_8_BANK));
2963
2964                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2965                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2966                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2967                                 NUM_BANKS(ADDR_SURF_4_BANK));
2968
2969                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2970                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2971
2972                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2973                         if (reg_offset != 7)
2974                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2975
2976                 break;
2977         case CHIP_POLARIS10:
2978                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2979                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2980                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2981                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2982                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2983                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2984                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2985                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2986                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2987                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2988                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2989                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2990                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2991                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2992                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2993                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2994                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2995                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2996                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2997                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2998                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2999                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3000                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3001                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3002                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3003                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3004                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3005                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3006                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3007                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3008                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3009                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3010                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3011                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
3012                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3013                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3014                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3015                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3016                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3017                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3018                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3019                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3020                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3021                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3022                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3023                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3024                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3025                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3026                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3027                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3028                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3029                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3030                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3031                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3032                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3033                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3034                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3035                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3036                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3037                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3038                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3039                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3040                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3041                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3042                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3043                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3044                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3045                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3046                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3047                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3048                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3049                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3050                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3051                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3052                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3053                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3054                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3055                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3056                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3057                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3058                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3059                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3060                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3061                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3062                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3063                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3064                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3065                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3066                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3067                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3068                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3069                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3070                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3071                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3072                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3073                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3074                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3075                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3076                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3077                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3078                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3079                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3080                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3081                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3082                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3083                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3084                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3085                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3086                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3087                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3088                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3089                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3090                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3091                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3092                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3093                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3094                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3095                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3096                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3097                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3098                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3099                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3100
3101                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3102                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3103                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3104                                 NUM_BANKS(ADDR_SURF_16_BANK));
3105
3106                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3107                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3108                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3109                                 NUM_BANKS(ADDR_SURF_16_BANK));
3110
3111                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3112                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3113                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3114                                 NUM_BANKS(ADDR_SURF_16_BANK));
3115
3116                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3117                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3118                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3119                                 NUM_BANKS(ADDR_SURF_16_BANK));
3120
3121                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3122                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3123                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3124                                 NUM_BANKS(ADDR_SURF_16_BANK));
3125
3126                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3127                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3128                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3129                                 NUM_BANKS(ADDR_SURF_16_BANK));
3130
3131                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3132                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3133                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3134                                 NUM_BANKS(ADDR_SURF_16_BANK));
3135
3136                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3137                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3138                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3139                                 NUM_BANKS(ADDR_SURF_16_BANK));
3140
3141                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3142                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3143                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3144                                 NUM_BANKS(ADDR_SURF_16_BANK));
3145
3146                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3147                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3148                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3149                                 NUM_BANKS(ADDR_SURF_16_BANK));
3150
3151                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3152                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3153                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3154                                 NUM_BANKS(ADDR_SURF_16_BANK));
3155
3156                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3157                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3158                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3159                                 NUM_BANKS(ADDR_SURF_8_BANK));
3160
3161                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3162                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3163                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3164                                 NUM_BANKS(ADDR_SURF_4_BANK));
3165
3166                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3167                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3168                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3169                                 NUM_BANKS(ADDR_SURF_4_BANK));
3170
3171                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3172                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3173
3174                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3175                         if (reg_offset != 7)
3176                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3177
3178                 break;
3179         case CHIP_STONEY:
3180                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3181                                 PIPE_CONFIG(ADDR_SURF_P2) |
3182                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3183                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3184                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3185                                 PIPE_CONFIG(ADDR_SURF_P2) |
3186                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3187                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3188                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3189                                 PIPE_CONFIG(ADDR_SURF_P2) |
3190                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3191                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3192                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3193                                 PIPE_CONFIG(ADDR_SURF_P2) |
3194                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3195                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3196                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3197                                 PIPE_CONFIG(ADDR_SURF_P2) |
3198                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3199                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3200                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3201                                 PIPE_CONFIG(ADDR_SURF_P2) |
3202                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3203                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3204                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3205                                 PIPE_CONFIG(ADDR_SURF_P2) |
3206                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3207                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3208                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3209                                 PIPE_CONFIG(ADDR_SURF_P2));
3210                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3211                                 PIPE_CONFIG(ADDR_SURF_P2) |
3212                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3213                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3214                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3215                                  PIPE_CONFIG(ADDR_SURF_P2) |
3216                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3217                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3218                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3219                                  PIPE_CONFIG(ADDR_SURF_P2) |
3220                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3221                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3222                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3223                                  PIPE_CONFIG(ADDR_SURF_P2) |
3224                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3225                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3226                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3227                                  PIPE_CONFIG(ADDR_SURF_P2) |
3228                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3229                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3230                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3231                                  PIPE_CONFIG(ADDR_SURF_P2) |
3232                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3233                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3234                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3235                                  PIPE_CONFIG(ADDR_SURF_P2) |
3236                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3237                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3238                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3239                                  PIPE_CONFIG(ADDR_SURF_P2) |
3240                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3241                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3242                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3243                                  PIPE_CONFIG(ADDR_SURF_P2) |
3244                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3245                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3246                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3247                                  PIPE_CONFIG(ADDR_SURF_P2) |
3248                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3249                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3250                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3251                                  PIPE_CONFIG(ADDR_SURF_P2) |
3252                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3253                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3254                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3255                                  PIPE_CONFIG(ADDR_SURF_P2) |
3256                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3257                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3258                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3259                                  PIPE_CONFIG(ADDR_SURF_P2) |
3260                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3261                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3262                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3263                                  PIPE_CONFIG(ADDR_SURF_P2) |
3264                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3265                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3266                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3267                                  PIPE_CONFIG(ADDR_SURF_P2) |
3268                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3269                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3270                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3271                                  PIPE_CONFIG(ADDR_SURF_P2) |
3272                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3273                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3274                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3275                                  PIPE_CONFIG(ADDR_SURF_P2) |
3276                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3277                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3278                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3279                                  PIPE_CONFIG(ADDR_SURF_P2) |
3280                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3281                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3282
3283                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3284                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3285                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3286                                 NUM_BANKS(ADDR_SURF_8_BANK));
3287                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3288                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3289                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3290                                 NUM_BANKS(ADDR_SURF_8_BANK));
3291                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3292                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3293                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3294                                 NUM_BANKS(ADDR_SURF_8_BANK));
3295                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3296                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3297                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3298                                 NUM_BANKS(ADDR_SURF_8_BANK));
3299                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3300                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3301                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3302                                 NUM_BANKS(ADDR_SURF_8_BANK));
3303                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3304                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3305                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3306                                 NUM_BANKS(ADDR_SURF_8_BANK));
3307                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3308                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3309                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3310                                 NUM_BANKS(ADDR_SURF_8_BANK));
3311                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3312                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3313                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3314                                 NUM_BANKS(ADDR_SURF_16_BANK));
3315                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3316                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3317                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3318                                 NUM_BANKS(ADDR_SURF_16_BANK));
3319                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3320                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3321                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3322                                  NUM_BANKS(ADDR_SURF_16_BANK));
3323                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3324                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3325                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3326                                  NUM_BANKS(ADDR_SURF_16_BANK));
3327                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3328                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3329                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3330                                  NUM_BANKS(ADDR_SURF_16_BANK));
3331                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3332                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3333                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3334                                  NUM_BANKS(ADDR_SURF_16_BANK));
3335                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3336                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3337                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3338                                  NUM_BANKS(ADDR_SURF_8_BANK));
3339
3340                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3341                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3342                             reg_offset != 23)
3343                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3344
3345                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3346                         if (reg_offset != 7)
3347                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3348
3349                 break;
3350         default:
3351                 dev_warn(adev->dev,
3352                          "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
3353                          adev->asic_type);
3354
3355         case CHIP_CARRIZO:
3356                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3357                                 PIPE_CONFIG(ADDR_SURF_P2) |
3358                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3359                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3360                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3361                                 PIPE_CONFIG(ADDR_SURF_P2) |
3362                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3363                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3364                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3365                                 PIPE_CONFIG(ADDR_SURF_P2) |
3366                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3367                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3368                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3369                                 PIPE_CONFIG(ADDR_SURF_P2) |
3370                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3371                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3372                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3373                                 PIPE_CONFIG(ADDR_SURF_P2) |
3374                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3375                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3376                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3377                                 PIPE_CONFIG(ADDR_SURF_P2) |
3378                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3379                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3380                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3381                                 PIPE_CONFIG(ADDR_SURF_P2) |
3382                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3383                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3384                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3385                                 PIPE_CONFIG(ADDR_SURF_P2));
3386                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3387                                 PIPE_CONFIG(ADDR_SURF_P2) |
3388                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3389                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3390                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3391                                  PIPE_CONFIG(ADDR_SURF_P2) |
3392                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3393                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3394                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3395                                  PIPE_CONFIG(ADDR_SURF_P2) |
3396                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3397                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3398                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3399                                  PIPE_CONFIG(ADDR_SURF_P2) |
3400                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3401                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3402                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3403                                  PIPE_CONFIG(ADDR_SURF_P2) |
3404                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3405                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3406                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3407                                  PIPE_CONFIG(ADDR_SURF_P2) |
3408                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3409                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3410                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3411                                  PIPE_CONFIG(ADDR_SURF_P2) |
3412                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3413                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3414                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3415                                  PIPE_CONFIG(ADDR_SURF_P2) |
3416                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3417                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3418                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3419                                  PIPE_CONFIG(ADDR_SURF_P2) |
3420                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3421                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3422                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3423                                  PIPE_CONFIG(ADDR_SURF_P2) |
3424                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3425                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3426                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3427                                  PIPE_CONFIG(ADDR_SURF_P2) |
3428                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3429                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3430                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3431                                  PIPE_CONFIG(ADDR_SURF_P2) |
3432                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3433                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3434                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3435                                  PIPE_CONFIG(ADDR_SURF_P2) |
3436                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3437                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3438                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3439                                  PIPE_CONFIG(ADDR_SURF_P2) |
3440                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3441                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3442                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3443                                  PIPE_CONFIG(ADDR_SURF_P2) |
3444                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3445                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3446                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3447                                  PIPE_CONFIG(ADDR_SURF_P2) |
3448                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3449                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3450                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3451                                  PIPE_CONFIG(ADDR_SURF_P2) |
3452                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3453                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3454                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3455                                  PIPE_CONFIG(ADDR_SURF_P2) |
3456                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3457                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3458
3459                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3460                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3461                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3462                                 NUM_BANKS(ADDR_SURF_8_BANK));
3463                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3464                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3465                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3466                                 NUM_BANKS(ADDR_SURF_8_BANK));
3467                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3468                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3469                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3470                                 NUM_BANKS(ADDR_SURF_8_BANK));
3471                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3472                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3473                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3474                                 NUM_BANKS(ADDR_SURF_8_BANK));
3475                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3476                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3477                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3478                                 NUM_BANKS(ADDR_SURF_8_BANK));
3479                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3480                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3481                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3482                                 NUM_BANKS(ADDR_SURF_8_BANK));
3483                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3484                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3485                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3486                                 NUM_BANKS(ADDR_SURF_8_BANK));
3487                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3488                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3489                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3490                                 NUM_BANKS(ADDR_SURF_16_BANK));
3491                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3492                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3493                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3494                                 NUM_BANKS(ADDR_SURF_16_BANK));
3495                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3496                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3497                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3498                                  NUM_BANKS(ADDR_SURF_16_BANK));
3499                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3500                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3501                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3502                                  NUM_BANKS(ADDR_SURF_16_BANK));
3503                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3504                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3505                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3506                                  NUM_BANKS(ADDR_SURF_16_BANK));
3507                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3508                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3509                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3510                                  NUM_BANKS(ADDR_SURF_16_BANK));
3511                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3512                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3513                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3514                                  NUM_BANKS(ADDR_SURF_8_BANK));
3515
3516                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3517                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3518                             reg_offset != 23)
3519                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3520
3521                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3522                         if (reg_offset != 7)
3523                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3524
3525                 break;
3526         }
3527 }
3528
3529 static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
3530                                   u32 se_num, u32 sh_num, u32 instance)
3531 {
3532         u32 data;
3533
3534         if (instance == 0xffffffff)
3535                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
3536         else
3537                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
3538
3539         if (se_num == 0xffffffff)
3540                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3541         else
3542                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3543
3544         if (sh_num == 0xffffffff)
3545                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3546         else
3547                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3548
3549         WREG32(mmGRBM_GFX_INDEX, data);
3550 }
3551
3552 static void gfx_v8_0_select_me_pipe_q(struct amdgpu_device *adev,
3553                                   u32 me, u32 pipe, u32 q)
3554 {
3555         vi_srbm_select(adev, me, pipe, q, 0);
3556 }
3557
3558 static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
3559 {
3560         u32 data, mask;
3561
3562         data =  RREG32(mmCC_RB_BACKEND_DISABLE) |
3563                 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3564
3565         data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE);
3566
3567         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
3568                                          adev->gfx.config.max_sh_per_se);
3569
3570         return (~data) & mask;
3571 }
3572
3573 static void
3574 gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1)
3575 {
3576         switch (adev->asic_type) {
3577         case CHIP_FIJI:
3578         case CHIP_VEGAM:
3579                 *rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) |
3580                           RB_XSEL2(1) | PKR_MAP(2) |
3581                           PKR_XSEL(1) | PKR_YSEL(1) |
3582                           SE_MAP(2) | SE_XSEL(2) | SE_YSEL(3);
3583                 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) |
3584                            SE_PAIR_YSEL(2);
3585                 break;
3586         case CHIP_TONGA:
3587         case CHIP_POLARIS10:
3588                 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3589                           SE_XSEL(1) | SE_YSEL(1);
3590                 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(2) |
3591                            SE_PAIR_YSEL(2);
3592                 break;
3593         case CHIP_TOPAZ:
3594         case CHIP_CARRIZO:
3595                 *rconf |= RB_MAP_PKR0(2);
3596                 *rconf1 |= 0x0;
3597                 break;
3598         case CHIP_POLARIS11:
3599         case CHIP_POLARIS12:
3600                 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3601                           SE_XSEL(1) | SE_YSEL(1);
3602                 *rconf1 |= 0x0;
3603                 break;
3604         case CHIP_STONEY:
3605                 *rconf |= 0x0;
3606                 *rconf1 |= 0x0;
3607                 break;
3608         default:
3609                 DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type);
3610                 break;
3611         }
3612 }
3613
3614 static void
3615 gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev,
3616                                         u32 raster_config, u32 raster_config_1,
3617                                         unsigned rb_mask, unsigned num_rb)
3618 {
3619         unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1);
3620         unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1);
3621         unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2);
3622         unsigned rb_per_se = num_rb / num_se;
3623         unsigned se_mask[4];
3624         unsigned se;
3625
3626         se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask;
3627         se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask;
3628         se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask;
3629         se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask;
3630
3631         WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4));
3632         WARN_ON(!(sh_per_se == 1 || sh_per_se == 2));
3633         WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2));
3634
3635         if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
3636                              (!se_mask[2] && !se_mask[3]))) {
3637                 raster_config_1 &= ~SE_PAIR_MAP_MASK;
3638
3639                 if (!se_mask[0] && !se_mask[1]) {
3640                         raster_config_1 |=
3641                                 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3);
3642                 } else {
3643                         raster_config_1 |=
3644                                 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0);
3645                 }
3646         }
3647
3648         for (se = 0; se < num_se; se++) {
3649                 unsigned raster_config_se = raster_config;
3650                 unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
3651                 unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
3652                 int idx = (se / 2) * 2;
3653
3654                 if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
3655                         raster_config_se &= ~SE_MAP_MASK;
3656
3657                         if (!se_mask[idx]) {
3658                                 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3);
3659                         } else {
3660                                 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0);
3661                         }
3662                 }
3663
3664                 pkr0_mask &= rb_mask;
3665                 pkr1_mask &= rb_mask;
3666                 if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
3667                         raster_config_se &= ~PKR_MAP_MASK;
3668
3669                         if (!pkr0_mask) {
3670                                 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3);
3671                         } else {
3672                                 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0);
3673                         }
3674                 }
3675
3676                 if (rb_per_se >= 2) {
3677                         unsigned rb0_mask = 1 << (se * rb_per_se);
3678                         unsigned rb1_mask = rb0_mask << 1;
3679
3680                         rb0_mask &= rb_mask;
3681                         rb1_mask &= rb_mask;
3682                         if (!rb0_mask || !rb1_mask) {
3683                                 raster_config_se &= ~RB_MAP_PKR0_MASK;
3684
3685                                 if (!rb0_mask) {
3686                                         raster_config_se |=
3687                                                 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3);
3688                                 } else {
3689                                         raster_config_se |=
3690                                                 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0);
3691                                 }
3692                         }
3693
3694                         if (rb_per_se > 2) {
3695                                 rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
3696                                 rb1_mask = rb0_mask << 1;
3697                                 rb0_mask &= rb_mask;
3698                                 rb1_mask &= rb_mask;
3699                                 if (!rb0_mask || !rb1_mask) {
3700                                         raster_config_se &= ~RB_MAP_PKR1_MASK;
3701
3702                                         if (!rb0_mask) {
3703                                                 raster_config_se |=
3704                                                         RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3);
3705                                         } else {
3706                                                 raster_config_se |=
3707                                                         RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0);
3708                                         }
3709                                 }
3710                         }
3711                 }
3712
3713                 /* GRBM_GFX_INDEX has a different offset on VI */
3714                 gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff);
3715                 WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se);
3716                 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3717         }
3718
3719         /* GRBM_GFX_INDEX has a different offset on VI */
3720         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3721 }
3722
3723 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
3724 {
3725         int i, j;
3726         u32 data;
3727         u32 raster_config = 0, raster_config_1 = 0;
3728         u32 active_rbs = 0;
3729         u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
3730                                         adev->gfx.config.max_sh_per_se;
3731         unsigned num_rb_pipes;
3732
3733         mutex_lock(&adev->grbm_idx_mutex);
3734         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3735                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3736                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3737                         data = gfx_v8_0_get_rb_active_bitmap(adev);
3738                         active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
3739                                                rb_bitmap_width_per_sh);
3740                 }
3741         }
3742         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3743
3744         adev->gfx.config.backend_enable_mask = active_rbs;
3745         adev->gfx.config.num_rbs = hweight32(active_rbs);
3746
3747         num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se *
3748                              adev->gfx.config.max_shader_engines, 16);
3749
3750         gfx_v8_0_raster_config(adev, &raster_config, &raster_config_1);
3751
3752         if (!adev->gfx.config.backend_enable_mask ||
3753                         adev->gfx.config.num_rbs >= num_rb_pipes) {
3754                 WREG32(mmPA_SC_RASTER_CONFIG, raster_config);
3755                 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3756         } else {
3757                 gfx_v8_0_write_harvested_raster_configs(adev, raster_config, raster_config_1,
3758                                                         adev->gfx.config.backend_enable_mask,
3759                                                         num_rb_pipes);
3760         }
3761
3762         /* cache the values for userspace */
3763         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3764                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3765                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3766                         adev->gfx.config.rb_config[i][j].rb_backend_disable =
3767                                 RREG32(mmCC_RB_BACKEND_DISABLE);
3768                         adev->gfx.config.rb_config[i][j].user_rb_backend_disable =
3769                                 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3770                         adev->gfx.config.rb_config[i][j].raster_config =
3771                                 RREG32(mmPA_SC_RASTER_CONFIG);
3772                         adev->gfx.config.rb_config[i][j].raster_config_1 =
3773                                 RREG32(mmPA_SC_RASTER_CONFIG_1);
3774                 }
3775         }
3776         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3777         mutex_unlock(&adev->grbm_idx_mutex);
3778 }
3779
3780 /**
3781  * gfx_v8_0_init_compute_vmid - gart enable
3782  *
3783  * @adev: amdgpu_device pointer
3784  *
3785  * Initialize compute vmid sh_mem registers
3786  *
3787  */
3788 #define DEFAULT_SH_MEM_BASES    (0x6000)
3789 #define FIRST_COMPUTE_VMID      (8)
3790 #define LAST_COMPUTE_VMID       (16)
3791 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
3792 {
3793         int i;
3794         uint32_t sh_mem_config;
3795         uint32_t sh_mem_bases;
3796
3797         /*
3798          * Configure apertures:
3799          * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
3800          * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
3801          * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
3802          */
3803         sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
3804
3805         sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
3806                         SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
3807                         SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
3808                         SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
3809                         MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
3810                         SH_MEM_CONFIG__PRIVATE_ATC_MASK;
3811
3812         mutex_lock(&adev->srbm_mutex);
3813         for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
3814                 vi_srbm_select(adev, 0, 0, 0, i);
3815                 /* CP and shaders */
3816                 WREG32(mmSH_MEM_CONFIG, sh_mem_config);
3817                 WREG32(mmSH_MEM_APE1_BASE, 1);
3818                 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3819                 WREG32(mmSH_MEM_BASES, sh_mem_bases);
3820         }
3821         vi_srbm_select(adev, 0, 0, 0, 0);
3822         mutex_unlock(&adev->srbm_mutex);
3823 }
3824
3825 static void gfx_v8_0_config_init(struct amdgpu_device *adev)
3826 {
3827         switch (adev->asic_type) {
3828         default:
3829                 adev->gfx.config.double_offchip_lds_buf = 1;
3830                 break;
3831         case CHIP_CARRIZO:
3832         case CHIP_STONEY:
3833                 adev->gfx.config.double_offchip_lds_buf = 0;
3834                 break;
3835         }
3836 }
3837
3838 static void gfx_v8_0_constants_init(struct amdgpu_device *adev)
3839 {
3840         u32 tmp, sh_static_mem_cfg;
3841         int i;
3842
3843         WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF);
3844         WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3845         WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3846         WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
3847
3848         gfx_v8_0_tiling_mode_table_init(adev);
3849         gfx_v8_0_setup_rb(adev);
3850         gfx_v8_0_get_cu_info(adev);
3851         gfx_v8_0_config_init(adev);
3852
3853         /* XXX SH_MEM regs */
3854         /* where to put LDS, scratch, GPUVM in FSA64 space */
3855         sh_static_mem_cfg = REG_SET_FIELD(0, SH_STATIC_MEM_CONFIG,
3856                                    SWIZZLE_ENABLE, 1);
3857         sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3858                                    ELEMENT_SIZE, 1);
3859         sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3860                                    INDEX_STRIDE, 3);
3861         WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg);
3862
3863         mutex_lock(&adev->srbm_mutex);
3864         for (i = 0; i < adev->vm_manager.id_mgr[0].num_ids; i++) {
3865                 vi_srbm_select(adev, 0, 0, 0, i);
3866                 /* CP and shaders */
3867                 if (i == 0) {
3868                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
3869                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3870                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3871                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3872                         WREG32(mmSH_MEM_CONFIG, tmp);
3873                         WREG32(mmSH_MEM_BASES, 0);
3874                 } else {
3875                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
3876                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3877                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3878                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3879                         WREG32(mmSH_MEM_CONFIG, tmp);
3880                         tmp = adev->gmc.shared_aperture_start >> 48;
3881                         WREG32(mmSH_MEM_BASES, tmp);
3882                 }
3883
3884                 WREG32(mmSH_MEM_APE1_BASE, 1);
3885                 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3886         }
3887         vi_srbm_select(adev, 0, 0, 0, 0);
3888         mutex_unlock(&adev->srbm_mutex);
3889
3890         gfx_v8_0_init_compute_vmid(adev);
3891
3892         mutex_lock(&adev->grbm_idx_mutex);
3893         /*
3894          * making sure that the following register writes will be broadcasted
3895          * to all the shaders
3896          */
3897         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3898
3899         WREG32(mmPA_SC_FIFO_SIZE,
3900                    (adev->gfx.config.sc_prim_fifo_size_frontend <<
3901                         PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
3902                    (adev->gfx.config.sc_prim_fifo_size_backend <<
3903                         PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
3904                    (adev->gfx.config.sc_hiz_tile_fifo_size <<
3905                         PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
3906                    (adev->gfx.config.sc_earlyz_tile_fifo_size <<
3907                         PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
3908
3909         tmp = RREG32(mmSPI_ARB_PRIORITY);
3910         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS0, 2);
3911         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS1, 2);
3912         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS2, 2);
3913         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS3, 2);
3914         WREG32(mmSPI_ARB_PRIORITY, tmp);
3915
3916         mutex_unlock(&adev->grbm_idx_mutex);
3917
3918 }
3919
3920 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3921 {
3922         u32 i, j, k;
3923         u32 mask;
3924
3925         mutex_lock(&adev->grbm_idx_mutex);
3926         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3927                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3928                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3929                         for (k = 0; k < adev->usec_timeout; k++) {
3930                                 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3931                                         break;
3932                                 udelay(1);
3933                         }
3934                         if (k == adev->usec_timeout) {
3935                                 gfx_v8_0_select_se_sh(adev, 0xffffffff,
3936                                                       0xffffffff, 0xffffffff);
3937                                 mutex_unlock(&adev->grbm_idx_mutex);
3938                                 DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
3939                                          i, j);
3940                                 return;
3941                         }
3942                 }
3943         }
3944         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3945         mutex_unlock(&adev->grbm_idx_mutex);
3946
3947         mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3948                 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3949                 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3950                 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3951         for (k = 0; k < adev->usec_timeout; k++) {
3952                 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3953                         break;
3954                 udelay(1);
3955         }
3956 }
3957
3958 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3959                                                bool enable)
3960 {
3961         u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3962
3963         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
3964         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
3965         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
3966         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
3967
3968         WREG32(mmCP_INT_CNTL_RING0, tmp);
3969 }
3970
3971 static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
3972 {
3973         /* csib */
3974         WREG32(mmRLC_CSIB_ADDR_HI,
3975                         adev->gfx.rlc.clear_state_gpu_addr >> 32);
3976         WREG32(mmRLC_CSIB_ADDR_LO,
3977                         adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
3978         WREG32(mmRLC_CSIB_LENGTH,
3979                         adev->gfx.rlc.clear_state_size);
3980 }
3981
3982 static void gfx_v8_0_parse_ind_reg_list(int *register_list_format,
3983                                 int ind_offset,
3984                                 int list_size,
3985                                 int *unique_indices,
3986                                 int *indices_count,
3987                                 int max_indices,
3988                                 int *ind_start_offsets,
3989                                 int *offset_count,
3990                                 int max_offset)
3991 {
3992         int indices;
3993         bool new_entry = true;
3994
3995         for (; ind_offset < list_size; ind_offset++) {
3996
3997                 if (new_entry) {
3998                         new_entry = false;
3999                         ind_start_offsets[*offset_count] = ind_offset;
4000                         *offset_count = *offset_count + 1;
4001                         BUG_ON(*offset_count >= max_offset);
4002                 }
4003
4004                 if (register_list_format[ind_offset] == 0xFFFFFFFF) {
4005                         new_entry = true;
4006                         continue;
4007                 }
4008
4009                 ind_offset += 2;
4010
4011                 /* look for the matching indice */
4012                 for (indices = 0;
4013                         indices < *indices_count;
4014                         indices++) {
4015                         if (unique_indices[indices] ==
4016                                 register_list_format[ind_offset])
4017                                 break;
4018                 }
4019
4020                 if (indices >= *indices_count) {
4021                         unique_indices[*indices_count] =
4022                                 register_list_format[ind_offset];
4023                         indices = *indices_count;
4024                         *indices_count = *indices_count + 1;
4025                         BUG_ON(*indices_count >= max_indices);
4026                 }
4027
4028                 register_list_format[ind_offset] = indices;
4029         }
4030 }
4031
4032 static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
4033 {
4034         int i, temp, data;
4035         int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0};
4036         int indices_count = 0;
4037         int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
4038         int offset_count = 0;
4039
4040         int list_size;
4041         unsigned int *register_list_format =
4042                 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
4043         if (!register_list_format)
4044                 return -ENOMEM;
4045         memcpy(register_list_format, adev->gfx.rlc.register_list_format,
4046                         adev->gfx.rlc.reg_list_format_size_bytes);
4047
4048         gfx_v8_0_parse_ind_reg_list(register_list_format,
4049                                 RLC_FormatDirectRegListLength,
4050                                 adev->gfx.rlc.reg_list_format_size_bytes >> 2,
4051                                 unique_indices,
4052                                 &indices_count,
4053                                 ARRAY_SIZE(unique_indices),
4054                                 indirect_start_offsets,
4055                                 &offset_count,
4056                                 ARRAY_SIZE(indirect_start_offsets));
4057
4058         /* save and restore list */
4059         WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1);
4060
4061         WREG32(mmRLC_SRM_ARAM_ADDR, 0);
4062         for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
4063                 WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]);
4064
4065         /* indirect list */
4066         WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start);
4067         for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
4068                 WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]);
4069
4070         list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
4071         list_size = list_size >> 1;
4072         WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size);
4073         WREG32(mmRLC_GPM_SCRATCH_DATA, list_size);
4074
4075         /* starting offsets starts */
4076         WREG32(mmRLC_GPM_SCRATCH_ADDR,
4077                 adev->gfx.rlc.starting_offsets_start);
4078         for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
4079                 WREG32(mmRLC_GPM_SCRATCH_DATA,
4080                                 indirect_start_offsets[i]);
4081
4082         /* unique indices */
4083         temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
4084         data = mmRLC_SRM_INDEX_CNTL_DATA_0;
4085         for (i = 0; i < ARRAY_SIZE(unique_indices); i++) {
4086                 if (unique_indices[i] != 0) {
4087                         WREG32(temp + i, unique_indices[i] & 0x3FFFF);
4088                         WREG32(data + i, unique_indices[i] >> 20);
4089                 }
4090         }
4091         kfree(register_list_format);
4092
4093         return 0;
4094 }
4095
4096 static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
4097 {
4098         WREG32_FIELD(RLC_SRM_CNTL, SRM_ENABLE, 1);
4099 }
4100
4101 static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev)
4102 {
4103         uint32_t data;
4104
4105         WREG32_FIELD(CP_RB_WPTR_POLL_CNTL, IDLE_POLL_COUNT, 0x60);
4106
4107         data = REG_SET_FIELD(0, RLC_PG_DELAY, POWER_UP_DELAY, 0x10);
4108         data = REG_SET_FIELD(data, RLC_PG_DELAY, POWER_DOWN_DELAY, 0x10);
4109         data = REG_SET_FIELD(data, RLC_PG_DELAY, CMD_PROPAGATE_DELAY, 0x10);
4110         data = REG_SET_FIELD(data, RLC_PG_DELAY, MEM_SLEEP_DELAY, 0x10);
4111         WREG32(mmRLC_PG_DELAY, data);
4112
4113         WREG32_FIELD(RLC_PG_DELAY_2, SERDES_CMD_DELAY, 0x3);
4114         WREG32_FIELD(RLC_AUTO_PG_CTRL, GRBM_REG_SAVE_GFX_IDLE_THRESHOLD, 0x55f0);
4115
4116 }
4117
4118 static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
4119                                                 bool enable)
4120 {
4121         WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PU_ENABLE, enable ? 1 : 0);
4122 }
4123
4124 static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
4125                                                   bool enable)
4126 {
4127         WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PD_ENABLE, enable ? 1 : 0);
4128 }
4129
4130 static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable)
4131 {
4132         WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 0 : 1);
4133 }
4134
4135 static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
4136 {
4137         if ((adev->asic_type == CHIP_CARRIZO) ||
4138             (adev->asic_type == CHIP_STONEY)) {
4139                 gfx_v8_0_init_csb(adev);
4140                 gfx_v8_0_init_save_restore_list(adev);
4141                 gfx_v8_0_enable_save_restore_machine(adev);
4142                 WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
4143                 gfx_v8_0_init_power_gating(adev);
4144                 WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
4145         } else if ((adev->asic_type == CHIP_POLARIS11) ||
4146                    (adev->asic_type == CHIP_POLARIS12) ||
4147                    (adev->asic_type == CHIP_VEGAM)) {
4148                 gfx_v8_0_init_csb(adev);
4149                 gfx_v8_0_init_save_restore_list(adev);
4150                 gfx_v8_0_enable_save_restore_machine(adev);
4151                 gfx_v8_0_init_power_gating(adev);
4152         }
4153
4154 }
4155
4156 static void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
4157 {
4158         WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 0);
4159
4160         gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4161         gfx_v8_0_wait_for_rlc_serdes(adev);
4162 }
4163
4164 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
4165 {
4166         WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4167         udelay(50);
4168
4169         WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
4170         udelay(50);
4171 }
4172
4173 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
4174 {
4175         WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 1);
4176
4177         /* carrizo do enable cp interrupt after cp inited */
4178         if (!(adev->flags & AMD_IS_APU))
4179                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4180
4181         udelay(50);
4182 }
4183
4184 static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev)
4185 {
4186         const struct rlc_firmware_header_v2_0 *hdr;
4187         const __le32 *fw_data;
4188         unsigned i, fw_size;
4189
4190         if (!adev->gfx.rlc_fw)
4191                 return -EINVAL;
4192
4193         hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
4194         amdgpu_ucode_print_rlc_hdr(&hdr->header);
4195
4196         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
4197                            le32_to_cpu(hdr->header.ucode_array_offset_bytes));
4198         fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
4199
4200         WREG32(mmRLC_GPM_UCODE_ADDR, 0);
4201         for (i = 0; i < fw_size; i++)
4202                 WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
4203         WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
4204
4205         return 0;
4206 }
4207
4208 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
4209 {
4210         int r;
4211
4212         gfx_v8_0_rlc_stop(adev);
4213         gfx_v8_0_rlc_reset(adev);
4214         gfx_v8_0_init_pg(adev);
4215
4216         if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
4217                 /* legacy rlc firmware loading */
4218                 r = gfx_v8_0_rlc_load_microcode(adev);
4219                 if (r)
4220                         return r;
4221         }
4222
4223         gfx_v8_0_rlc_start(adev);
4224
4225         return 0;
4226 }
4227
4228 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
4229 {
4230         int i;
4231         u32 tmp = RREG32(mmCP_ME_CNTL);
4232
4233         if (enable) {
4234                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
4235                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
4236                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
4237         } else {
4238                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
4239                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
4240                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
4241                 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
4242                         adev->gfx.gfx_ring[i].ready = false;
4243         }
4244         WREG32(mmCP_ME_CNTL, tmp);
4245         udelay(50);
4246 }
4247
4248 static int gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
4249 {
4250         const struct gfx_firmware_header_v1_0 *pfp_hdr;
4251         const struct gfx_firmware_header_v1_0 *ce_hdr;
4252         const struct gfx_firmware_header_v1_0 *me_hdr;
4253         const __le32 *fw_data;
4254         unsigned i, fw_size;
4255
4256         if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
4257                 return -EINVAL;
4258
4259         pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
4260                 adev->gfx.pfp_fw->data;
4261         ce_hdr = (const struct gfx_firmware_header_v1_0 *)
4262                 adev->gfx.ce_fw->data;
4263         me_hdr = (const struct gfx_firmware_header_v1_0 *)
4264                 adev->gfx.me_fw->data;
4265
4266         amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
4267         amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
4268         amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
4269
4270         gfx_v8_0_cp_gfx_enable(adev, false);
4271
4272         /* PFP */
4273         fw_data = (const __le32 *)
4274                 (adev->gfx.pfp_fw->data +
4275                  le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
4276         fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
4277         WREG32(mmCP_PFP_UCODE_ADDR, 0);
4278         for (i = 0; i < fw_size; i++)
4279                 WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
4280         WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
4281
4282         /* CE */
4283         fw_data = (const __le32 *)
4284                 (adev->gfx.ce_fw->data +
4285                  le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
4286         fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
4287         WREG32(mmCP_CE_UCODE_ADDR, 0);
4288         for (i = 0; i < fw_size; i++)
4289                 WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
4290         WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
4291
4292         /* ME */
4293         fw_data = (const __le32 *)
4294                 (adev->gfx.me_fw->data +
4295                  le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
4296         fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
4297         WREG32(mmCP_ME_RAM_WADDR, 0);
4298         for (i = 0; i < fw_size; i++)
4299                 WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
4300         WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
4301
4302         return 0;
4303 }
4304
4305 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
4306 {
4307         u32 count = 0;
4308         const struct cs_section_def *sect = NULL;
4309         const struct cs_extent_def *ext = NULL;
4310
4311         /* begin clear state */
4312         count += 2;
4313         /* context control state */
4314         count += 3;
4315
4316         for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4317                 for (ext = sect->section; ext->extent != NULL; ++ext) {
4318                         if (sect->id == SECT_CONTEXT)
4319                                 count += 2 + ext->reg_count;
4320                         else
4321                                 return 0;
4322                 }
4323         }
4324         /* pa_sc_raster_config/pa_sc_raster_config1 */
4325         count += 4;
4326         /* end clear state */
4327         count += 2;
4328         /* clear state */
4329         count += 2;
4330
4331         return count;
4332 }
4333
4334 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
4335 {
4336         struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
4337         const struct cs_section_def *sect = NULL;
4338         const struct cs_extent_def *ext = NULL;
4339         int r, i;
4340
4341         /* init the CP */
4342         WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
4343         WREG32(mmCP_ENDIAN_SWAP, 0);
4344         WREG32(mmCP_DEVICE_ID, 1);
4345
4346         gfx_v8_0_cp_gfx_enable(adev, true);
4347
4348         r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
4349         if (r) {
4350                 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
4351                 return r;
4352         }
4353
4354         /* clear state buffer */
4355         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4356         amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4357
4358         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4359         amdgpu_ring_write(ring, 0x80000000);
4360         amdgpu_ring_write(ring, 0x80000000);
4361
4362         for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4363                 for (ext = sect->section; ext->extent != NULL; ++ext) {
4364                         if (sect->id == SECT_CONTEXT) {
4365                                 amdgpu_ring_write(ring,
4366                                        PACKET3(PACKET3_SET_CONTEXT_REG,
4367                                                ext->reg_count));
4368                                 amdgpu_ring_write(ring,
4369                                        ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
4370                                 for (i = 0; i < ext->reg_count; i++)
4371                                         amdgpu_ring_write(ring, ext->extent[i]);
4372                         }
4373                 }
4374         }
4375
4376         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4377         amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
4378         amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config);
4379         amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config_1);
4380
4381         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4382         amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4383
4384         amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4385         amdgpu_ring_write(ring, 0);
4386
4387         /* init the CE partitions */
4388         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4389         amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4390         amdgpu_ring_write(ring, 0x8000);
4391         amdgpu_ring_write(ring, 0x8000);
4392
4393         amdgpu_ring_commit(ring);
4394
4395         return 0;
4396 }
4397 static void gfx_v8_0_set_cpg_door_bell(struct amdgpu_device *adev, struct amdgpu_ring *ring)
4398 {
4399         u32 tmp;
4400         /* no gfx doorbells on iceland */
4401         if (adev->asic_type == CHIP_TOPAZ)
4402                 return;
4403
4404         tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
4405
4406         if (ring->use_doorbell) {
4407                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4408                                 DOORBELL_OFFSET, ring->doorbell_index);
4409                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4410                                                 DOORBELL_HIT, 0);
4411                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4412                                             DOORBELL_EN, 1);
4413         } else {
4414                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
4415         }
4416
4417         WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
4418
4419         if (adev->flags & AMD_IS_APU)
4420                 return;
4421
4422         tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
4423                                         DOORBELL_RANGE_LOWER,
4424                                         AMDGPU_DOORBELL_GFX_RING0);
4425         WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
4426
4427         WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
4428                 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
4429 }
4430
4431 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
4432 {
4433         struct amdgpu_ring *ring;
4434         u32 tmp;
4435         u32 rb_bufsz;
4436         u64 rb_addr, rptr_addr, wptr_gpu_addr;
4437         int r;
4438
4439         /* Set the write pointer delay */
4440         WREG32(mmCP_RB_WPTR_DELAY, 0);
4441
4442         /* set the RB to use vmid 0 */
4443         WREG32(mmCP_RB_VMID, 0);
4444
4445         /* Set ring buffer size */
4446         ring = &adev->gfx.gfx_ring[0];
4447         rb_bufsz = order_base_2(ring->ring_size / 8);
4448         tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
4449         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
4450         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
4451         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
4452 #ifdef __BIG_ENDIAN
4453         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
4454 #endif
4455         WREG32(mmCP_RB0_CNTL, tmp);
4456
4457         /* Initialize the ring buffer's read and write pointers */
4458         WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
4459         ring->wptr = 0;
4460         WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
4461
4462         /* set the wb address wether it's enabled or not */
4463         rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4464         WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
4465         WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
4466
4467         wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4468         WREG32(mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
4469         WREG32(mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
4470         mdelay(1);
4471         WREG32(mmCP_RB0_CNTL, tmp);
4472
4473         rb_addr = ring->gpu_addr >> 8;
4474         WREG32(mmCP_RB0_BASE, rb_addr);
4475         WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
4476
4477         gfx_v8_0_set_cpg_door_bell(adev, ring);
4478         /* start the ring */
4479         amdgpu_ring_clear_ring(ring);
4480         gfx_v8_0_cp_gfx_start(adev);
4481         ring->ready = true;
4482         r = amdgpu_ring_test_ring(ring);
4483         if (r)
4484                 ring->ready = false;
4485
4486         return r;
4487 }
4488
4489 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
4490 {
4491         int i;
4492
4493         if (enable) {
4494                 WREG32(mmCP_MEC_CNTL, 0);
4495         } else {
4496                 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
4497                 for (i = 0; i < adev->gfx.num_compute_rings; i++)
4498                         adev->gfx.compute_ring[i].ready = false;
4499                 adev->gfx.kiq.ring.ready = false;
4500         }
4501         udelay(50);
4502 }
4503
4504 static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev)
4505 {
4506         const struct gfx_firmware_header_v1_0 *mec_hdr;
4507         const __le32 *fw_data;
4508         unsigned i, fw_size;
4509
4510         if (!adev->gfx.mec_fw)
4511                 return -EINVAL;
4512
4513         gfx_v8_0_cp_compute_enable(adev, false);
4514
4515         mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
4516         amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
4517
4518         fw_data = (const __le32 *)
4519                 (adev->gfx.mec_fw->data +
4520                  le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4521         fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4522
4523         /* MEC1 */
4524         WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0);
4525         for (i = 0; i < fw_size; i++)
4526                 WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data+i));
4527         WREG32(mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version);
4528
4529         /* Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
4530         if (adev->gfx.mec2_fw) {
4531                 const struct gfx_firmware_header_v1_0 *mec2_hdr;
4532
4533                 mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
4534                 amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header);
4535
4536                 fw_data = (const __le32 *)
4537                         (adev->gfx.mec2_fw->data +
4538                          le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4539                 fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4540
4541                 WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0);
4542                 for (i = 0; i < fw_size; i++)
4543                         WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data+i));
4544                 WREG32(mmCP_MEC_ME2_UCODE_ADDR, adev->gfx.mec2_fw_version);
4545         }
4546
4547         return 0;
4548 }
4549
4550 /* KIQ functions */
4551 static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring)
4552 {
4553         uint32_t tmp;
4554         struct amdgpu_device *adev = ring->adev;
4555
4556         /* tell RLC which is KIQ queue */
4557         tmp = RREG32(mmRLC_CP_SCHEDULERS);
4558         tmp &= 0xffffff00;
4559         tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
4560         WREG32(mmRLC_CP_SCHEDULERS, tmp);
4561         tmp |= 0x80;
4562         WREG32(mmRLC_CP_SCHEDULERS, tmp);
4563 }
4564
4565 static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev)
4566 {
4567         struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
4568         uint64_t queue_mask = 0;
4569         int r, i;
4570
4571         for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
4572                 if (!test_bit(i, adev->gfx.mec.queue_bitmap))
4573                         continue;
4574
4575                 /* This situation may be hit in the future if a new HW
4576                  * generation exposes more than 64 queues. If so, the
4577                  * definition of queue_mask needs updating */
4578                 if (WARN_ON(i >= (sizeof(queue_mask)*8))) {
4579                         DRM_ERROR("Invalid KCQ enabled: %d\n", i);
4580                         break;
4581                 }
4582
4583                 queue_mask |= (1ull << i);
4584         }
4585
4586         r = amdgpu_ring_alloc(kiq_ring, (8 * adev->gfx.num_compute_rings) + 8);
4587         if (r) {
4588                 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
4589                 return r;
4590         }
4591         /* set resources */
4592         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
4593         amdgpu_ring_write(kiq_ring, 0); /* vmid_mask:0 queue_type:0 (KIQ) */
4594         amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */
4595         amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */
4596         amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
4597         amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
4598         amdgpu_ring_write(kiq_ring, 0); /* oac mask */
4599         amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
4600         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4601                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4602                 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
4603                 uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4604
4605                 /* map queues */
4606                 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
4607                 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
4608                 amdgpu_ring_write(kiq_ring,
4609                                   PACKET3_MAP_QUEUES_NUM_QUEUES(1));
4610                 amdgpu_ring_write(kiq_ring,
4611                                   PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index) |
4612                                   PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
4613                                   PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
4614                                   PACKET3_MAP_QUEUES_ME(ring->me == 1 ? 0 : 1)); /* doorbell */
4615                 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
4616                 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
4617                 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
4618                 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
4619         }
4620
4621         r = amdgpu_ring_test_ring(kiq_ring);
4622         if (r) {
4623                 DRM_ERROR("KCQ enable failed\n");
4624                 kiq_ring->ready = false;
4625         }
4626         return r;
4627 }
4628
4629 static int gfx_v8_0_deactivate_hqd(struct amdgpu_device *adev, u32 req)
4630 {
4631         int i, r = 0;
4632
4633         if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) {
4634                 WREG32_FIELD(CP_HQD_DEQUEUE_REQUEST, DEQUEUE_REQ, req);
4635                 for (i = 0; i < adev->usec_timeout; i++) {
4636                         if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK))
4637                                 break;
4638                         udelay(1);
4639                 }
4640                 if (i == adev->usec_timeout)
4641                         r = -ETIMEDOUT;
4642         }
4643         WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
4644         WREG32(mmCP_HQD_PQ_RPTR, 0);
4645         WREG32(mmCP_HQD_PQ_WPTR, 0);
4646
4647         return r;
4648 }
4649
4650 static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
4651 {
4652         struct amdgpu_device *adev = ring->adev;
4653         struct vi_mqd *mqd = ring->mqd_ptr;
4654         uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
4655         uint32_t tmp;
4656
4657         mqd->header = 0xC0310800;
4658         mqd->compute_pipelinestat_enable = 0x00000001;
4659         mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4660         mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4661         mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4662         mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4663         mqd->compute_misc_reserved = 0x00000003;
4664         mqd->dynamic_cu_mask_addr_lo = lower_32_bits(ring->mqd_gpu_addr
4665                                                      + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4666         mqd->dynamic_cu_mask_addr_hi = upper_32_bits(ring->mqd_gpu_addr
4667                                                      + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4668         eop_base_addr = ring->eop_gpu_addr >> 8;
4669         mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
4670         mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
4671
4672         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4673         tmp = RREG32(mmCP_HQD_EOP_CONTROL);
4674         tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
4675                         (order_base_2(GFX8_MEC_HPD_SIZE / 4) - 1));
4676
4677         mqd->cp_hqd_eop_control = tmp;
4678
4679         /* enable doorbell? */
4680         tmp = REG_SET_FIELD(RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL),
4681                             CP_HQD_PQ_DOORBELL_CONTROL,
4682                             DOORBELL_EN,
4683                             ring->use_doorbell ? 1 : 0);
4684
4685         mqd->cp_hqd_pq_doorbell_control = tmp;
4686
4687         /* set the pointer to the MQD */
4688         mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
4689         mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
4690
4691         /* set MQD vmid to 0 */
4692         tmp = RREG32(mmCP_MQD_CONTROL);
4693         tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4694         mqd->cp_mqd_control = tmp;
4695
4696         /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4697         hqd_gpu_addr = ring->gpu_addr >> 8;
4698         mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4699         mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4700
4701         /* set up the HQD, this is similar to CP_RB0_CNTL */
4702         tmp = RREG32(mmCP_HQD_PQ_CONTROL);
4703         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4704                             (order_base_2(ring->ring_size / 4) - 1));
4705         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4706                         ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
4707 #ifdef __BIG_ENDIAN
4708         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
4709 #endif
4710         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
4711         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
4712         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4713         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4714         mqd->cp_hqd_pq_control = tmp;
4715
4716         /* set the wb address whether it's enabled or not */
4717         wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4718         mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4719         mqd->cp_hqd_pq_rptr_report_addr_hi =
4720                 upper_32_bits(wb_gpu_addr) & 0xffff;
4721
4722         /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4723         wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4724         mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
4725         mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4726
4727         tmp = 0;
4728         /* enable the doorbell if requested */
4729         if (ring->use_doorbell) {
4730                 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4731                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4732                                 DOORBELL_OFFSET, ring->doorbell_index);
4733
4734                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4735                                          DOORBELL_EN, 1);
4736                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4737                                          DOORBELL_SOURCE, 0);
4738                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4739                                          DOORBELL_HIT, 0);
4740         }
4741
4742         mqd->cp_hqd_pq_doorbell_control = tmp;
4743
4744         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4745         ring->wptr = 0;
4746         mqd->cp_hqd_pq_wptr = ring->wptr;
4747         mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
4748
4749         /* set the vmid for the queue */
4750         mqd->cp_hqd_vmid = 0;
4751
4752         tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
4753         tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
4754         mqd->cp_hqd_persistent_state = tmp;
4755
4756         /* set MTYPE */
4757         tmp = RREG32(mmCP_HQD_IB_CONTROL);
4758         tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
4759         tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MTYPE, 3);
4760         mqd->cp_hqd_ib_control = tmp;
4761
4762         tmp = RREG32(mmCP_HQD_IQ_TIMER);
4763         tmp = REG_SET_FIELD(tmp, CP_HQD_IQ_TIMER, MTYPE, 3);
4764         mqd->cp_hqd_iq_timer = tmp;
4765
4766         tmp = RREG32(mmCP_HQD_CTX_SAVE_CONTROL);
4767         tmp = REG_SET_FIELD(tmp, CP_HQD_CTX_SAVE_CONTROL, MTYPE, 3);
4768         mqd->cp_hqd_ctx_save_control = tmp;
4769
4770         /* defaults */
4771         mqd->cp_hqd_eop_rptr = RREG32(mmCP_HQD_EOP_RPTR);
4772         mqd->cp_hqd_eop_wptr = RREG32(mmCP_HQD_EOP_WPTR);
4773         mqd->cp_hqd_pipe_priority = RREG32(mmCP_HQD_PIPE_PRIORITY);
4774         mqd->cp_hqd_queue_priority = RREG32(mmCP_HQD_QUEUE_PRIORITY);
4775         mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM);
4776         mqd->cp_hqd_ctx_save_base_addr_lo = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_LO);
4777         mqd->cp_hqd_ctx_save_base_addr_hi = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_HI);
4778         mqd->cp_hqd_cntl_stack_offset = RREG32(mmCP_HQD_CNTL_STACK_OFFSET);
4779         mqd->cp_hqd_cntl_stack_size = RREG32(mmCP_HQD_CNTL_STACK_SIZE);
4780         mqd->cp_hqd_wg_state_offset = RREG32(mmCP_HQD_WG_STATE_OFFSET);
4781         mqd->cp_hqd_ctx_save_size = RREG32(mmCP_HQD_CTX_SAVE_SIZE);
4782         mqd->cp_hqd_eop_done_events = RREG32(mmCP_HQD_EOP_EVENTS);
4783         mqd->cp_hqd_error = RREG32(mmCP_HQD_ERROR);
4784         mqd->cp_hqd_eop_wptr_mem = RREG32(mmCP_HQD_EOP_WPTR_MEM);
4785         mqd->cp_hqd_eop_dones = RREG32(mmCP_HQD_EOP_DONES);
4786
4787         /* activate the queue */
4788         mqd->cp_hqd_active = 1;
4789
4790         return 0;
4791 }
4792
4793 int gfx_v8_0_mqd_commit(struct amdgpu_device *adev,
4794                         struct vi_mqd *mqd)
4795 {
4796         uint32_t mqd_reg;
4797         uint32_t *mqd_data;
4798
4799         /* HQD registers extend from mmCP_MQD_BASE_ADDR to mmCP_HQD_ERROR */
4800         mqd_data = &mqd->cp_mqd_base_addr_lo;
4801
4802         /* disable wptr polling */
4803         WREG32_FIELD(CP_PQ_WPTR_POLL_CNTL, EN, 0);
4804
4805         /* program all HQD registers */
4806         for (mqd_reg = mmCP_HQD_VMID; mqd_reg <= mmCP_HQD_EOP_CONTROL; mqd_reg++)
4807                 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4808
4809         /* Tonga errata: EOP RPTR/WPTR should be left unmodified.
4810          * This is safe since EOP RPTR==WPTR for any inactive HQD
4811          * on ASICs that do not support context-save.
4812          * EOP writes/reads can start anywhere in the ring.
4813          */
4814         if (adev->asic_type != CHIP_TONGA) {
4815                 WREG32(mmCP_HQD_EOP_RPTR, mqd->cp_hqd_eop_rptr);
4816                 WREG32(mmCP_HQD_EOP_WPTR, mqd->cp_hqd_eop_wptr);
4817                 WREG32(mmCP_HQD_EOP_WPTR_MEM, mqd->cp_hqd_eop_wptr_mem);
4818         }
4819
4820         for (mqd_reg = mmCP_HQD_EOP_EVENTS; mqd_reg <= mmCP_HQD_ERROR; mqd_reg++)
4821                 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4822
4823         /* activate the HQD */
4824         for (mqd_reg = mmCP_MQD_BASE_ADDR; mqd_reg <= mmCP_HQD_ACTIVE; mqd_reg++)
4825                 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4826
4827         return 0;
4828 }
4829
4830 static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring)
4831 {
4832         struct amdgpu_device *adev = ring->adev;
4833         struct vi_mqd *mqd = ring->mqd_ptr;
4834         int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
4835
4836         gfx_v8_0_kiq_setting(ring);
4837
4838         if (adev->in_gpu_reset) { /* for GPU_RESET case */
4839                 /* reset MQD to a clean status */
4840                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4841                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4842
4843                 /* reset ring buffer */
4844                 ring->wptr = 0;
4845                 amdgpu_ring_clear_ring(ring);
4846                 mutex_lock(&adev->srbm_mutex);
4847                 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4848                 gfx_v8_0_mqd_commit(adev, mqd);
4849                 vi_srbm_select(adev, 0, 0, 0, 0);
4850                 mutex_unlock(&adev->srbm_mutex);
4851         } else {
4852                 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4853                 ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4854                 ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
4855                 mutex_lock(&adev->srbm_mutex);
4856                 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4857                 gfx_v8_0_mqd_init(ring);
4858                 gfx_v8_0_mqd_commit(adev, mqd);
4859                 vi_srbm_select(adev, 0, 0, 0, 0);
4860                 mutex_unlock(&adev->srbm_mutex);
4861
4862                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4863                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4864         }
4865
4866         return 0;
4867 }
4868
4869 static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring)
4870 {
4871         struct amdgpu_device *adev = ring->adev;
4872         struct vi_mqd *mqd = ring->mqd_ptr;
4873         int mqd_idx = ring - &adev->gfx.compute_ring[0];
4874
4875         if (!adev->in_gpu_reset && !adev->gfx.in_suspend) {
4876                 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4877                 ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4878                 ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
4879                 mutex_lock(&adev->srbm_mutex);
4880                 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4881                 gfx_v8_0_mqd_init(ring);
4882                 vi_srbm_select(adev, 0, 0, 0, 0);
4883                 mutex_unlock(&adev->srbm_mutex);
4884
4885                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4886                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4887         } else if (adev->in_gpu_reset) { /* for GPU_RESET case */
4888                 /* reset MQD to a clean status */
4889                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4890                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4891                 /* reset ring buffer */
4892                 ring->wptr = 0;
4893                 amdgpu_ring_clear_ring(ring);
4894         } else {
4895                 amdgpu_ring_clear_ring(ring);
4896         }
4897         return 0;
4898 }
4899
4900 static void gfx_v8_0_set_mec_doorbell_range(struct amdgpu_device *adev)
4901 {
4902         if (adev->asic_type > CHIP_TONGA) {
4903                 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, AMDGPU_DOORBELL_KIQ << 2);
4904                 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, AMDGPU_DOORBELL_MEC_RING7 << 2);
4905         }
4906         /* enable doorbells */
4907         WREG32_FIELD(CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4908 }
4909
4910 static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev)
4911 {
4912         struct amdgpu_ring *ring;
4913         int r;
4914
4915         ring = &adev->gfx.kiq.ring;
4916
4917         r = amdgpu_bo_reserve(ring->mqd_obj, false);
4918         if (unlikely(r != 0))
4919                 return r;
4920
4921         r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4922         if (unlikely(r != 0))
4923                 return r;
4924
4925         gfx_v8_0_kiq_init_queue(ring);
4926         amdgpu_bo_kunmap(ring->mqd_obj);
4927         ring->mqd_ptr = NULL;
4928         amdgpu_bo_unreserve(ring->mqd_obj);
4929         ring->ready = true;
4930         return 0;
4931 }
4932
4933 static int gfx_v8_0_kcq_resume(struct amdgpu_device *adev)
4934 {
4935         struct amdgpu_ring *ring = NULL;
4936         int r = 0, i;
4937
4938         gfx_v8_0_cp_compute_enable(adev, true);
4939
4940         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4941                 ring = &adev->gfx.compute_ring[i];
4942
4943                 r = amdgpu_bo_reserve(ring->mqd_obj, false);
4944                 if (unlikely(r != 0))
4945                         goto done;
4946                 r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4947                 if (!r) {
4948                         r = gfx_v8_0_kcq_init_queue(ring);
4949                         amdgpu_bo_kunmap(ring->mqd_obj);
4950                         ring->mqd_ptr = NULL;
4951                 }
4952                 amdgpu_bo_unreserve(ring->mqd_obj);
4953                 if (r)
4954                         goto done;
4955         }
4956
4957         gfx_v8_0_set_mec_doorbell_range(adev);
4958
4959         r = gfx_v8_0_kiq_kcq_enable(adev);
4960         if (r)
4961                 goto done;
4962
4963         /* Test KCQs */
4964         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4965                 ring = &adev->gfx.compute_ring[i];
4966                 ring->ready = true;
4967                 r = amdgpu_ring_test_ring(ring);
4968                 if (r)
4969                         ring->ready = false;
4970         }
4971
4972 done:
4973         return r;
4974 }
4975
4976 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
4977 {
4978         int r;
4979
4980         if (!(adev->flags & AMD_IS_APU))
4981                 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4982
4983         if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
4984                         /* legacy firmware loading */
4985                 r = gfx_v8_0_cp_gfx_load_microcode(adev);
4986                 if (r)
4987                         return r;
4988
4989                 r = gfx_v8_0_cp_compute_load_microcode(adev);
4990                 if (r)
4991                         return r;
4992         }
4993
4994         r = gfx_v8_0_kiq_resume(adev);
4995         if (r)
4996                 return r;
4997
4998         r = gfx_v8_0_cp_gfx_resume(adev);
4999         if (r)
5000                 return r;
5001
5002         r = gfx_v8_0_kcq_resume(adev);
5003         if (r)
5004                 return r;
5005         gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5006
5007         return 0;
5008 }
5009
5010 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
5011 {
5012         gfx_v8_0_cp_gfx_enable(adev, enable);
5013         gfx_v8_0_cp_compute_enable(adev, enable);
5014 }
5015
5016 static int gfx_v8_0_hw_init(void *handle)
5017 {
5018         int r;
5019         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5020
5021         gfx_v8_0_init_golden_registers(adev);
5022         gfx_v8_0_constants_init(adev);
5023
5024         r = gfx_v8_0_rlc_resume(adev);
5025         if (r)
5026                 return r;
5027
5028         r = gfx_v8_0_cp_resume(adev);
5029
5030         return r;
5031 }
5032
5033 static int gfx_v8_0_kcq_disable(struct amdgpu_device *adev)
5034 {
5035         int r, i;
5036         struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
5037
5038         r = amdgpu_ring_alloc(kiq_ring, 6 * adev->gfx.num_compute_rings);
5039         if (r)
5040                 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
5041
5042         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5043                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5044
5045                 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
5046                 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
5047                                                 PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */
5048                                                 PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
5049                                                 PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) |
5050                                                 PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
5051                 amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
5052                 amdgpu_ring_write(kiq_ring, 0);
5053                 amdgpu_ring_write(kiq_ring, 0);
5054                 amdgpu_ring_write(kiq_ring, 0);
5055         }
5056         r = amdgpu_ring_test_ring(kiq_ring);
5057         if (r)
5058                 DRM_ERROR("KCQ disable failed\n");
5059
5060         return r;
5061 }
5062
5063 static bool gfx_v8_0_is_idle(void *handle)
5064 {
5065         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5066
5067         if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE)
5068                 || RREG32(mmGRBM_STATUS2) != 0x8)
5069                 return false;
5070         else
5071                 return true;
5072 }
5073
5074 static bool gfx_v8_0_rlc_is_idle(void *handle)
5075 {
5076         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5077
5078         if (RREG32(mmGRBM_STATUS2) != 0x8)
5079                 return false;
5080         else
5081                 return true;
5082 }
5083
5084 static int gfx_v8_0_wait_for_rlc_idle(void *handle)
5085 {
5086         unsigned int i;
5087         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5088
5089         for (i = 0; i < adev->usec_timeout; i++) {
5090                 if (gfx_v8_0_rlc_is_idle(handle))
5091                         return 0;
5092
5093                 udelay(1);
5094         }
5095         return -ETIMEDOUT;
5096 }
5097
5098 static int gfx_v8_0_wait_for_idle(void *handle)
5099 {
5100         unsigned int i;
5101         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5102
5103         for (i = 0; i < adev->usec_timeout; i++) {
5104                 if (gfx_v8_0_is_idle(handle))
5105                         return 0;
5106
5107                 udelay(1);
5108         }
5109         return -ETIMEDOUT;
5110 }
5111
5112 static int gfx_v8_0_hw_fini(void *handle)
5113 {
5114         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5115
5116         amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
5117         amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
5118
5119         amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
5120
5121         amdgpu_irq_put(adev, &adev->gfx.sq_irq, 0);
5122
5123         /* disable KCQ to avoid CPC touch memory not valid anymore */
5124         gfx_v8_0_kcq_disable(adev);
5125
5126         if (amdgpu_sriov_vf(adev)) {
5127                 pr_debug("For SRIOV client, shouldn't do anything.\n");
5128                 return 0;
5129         }
5130         adev->gfx.rlc.funcs->enter_safe_mode(adev);
5131         if (!gfx_v8_0_wait_for_idle(adev))
5132                 gfx_v8_0_cp_enable(adev, false);
5133         else
5134                 pr_err("cp is busy, skip halt cp\n");
5135         if (!gfx_v8_0_wait_for_rlc_idle(adev))
5136                 gfx_v8_0_rlc_stop(adev);
5137         else
5138                 pr_err("rlc is busy, skip halt rlc\n");
5139         adev->gfx.rlc.funcs->exit_safe_mode(adev);
5140         return 0;
5141 }
5142
5143 static int gfx_v8_0_suspend(void *handle)
5144 {
5145         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5146         adev->gfx.in_suspend = true;
5147         return gfx_v8_0_hw_fini(adev);
5148 }
5149
5150 static int gfx_v8_0_resume(void *handle)
5151 {
5152         int r;
5153         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5154
5155         r = gfx_v8_0_hw_init(adev);
5156         adev->gfx.in_suspend = false;
5157         return r;
5158 }
5159
5160 static bool gfx_v8_0_check_soft_reset(void *handle)
5161 {
5162         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5163         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5164         u32 tmp;
5165
5166         /* GRBM_STATUS */
5167         tmp = RREG32(mmGRBM_STATUS);
5168         if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
5169                    GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
5170                    GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
5171                    GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
5172                    GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
5173                    GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK |
5174                    GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
5175                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5176                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
5177                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5178                                                 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
5179                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5180                                                 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5181         }
5182
5183         /* GRBM_STATUS2 */
5184         tmp = RREG32(mmGRBM_STATUS2);
5185         if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
5186                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5187                                                 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
5188
5189         if (REG_GET_FIELD(tmp, GRBM_STATUS2, CPF_BUSY) ||
5190             REG_GET_FIELD(tmp, GRBM_STATUS2, CPC_BUSY) ||
5191             REG_GET_FIELD(tmp, GRBM_STATUS2, CPG_BUSY)) {
5192                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5193                                                 SOFT_RESET_CPF, 1);
5194                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5195                                                 SOFT_RESET_CPC, 1);
5196                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5197                                                 SOFT_RESET_CPG, 1);
5198                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET,
5199                                                 SOFT_RESET_GRBM, 1);
5200         }
5201
5202         /* SRBM_STATUS */
5203         tmp = RREG32(mmSRBM_STATUS);
5204         if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
5205                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5206                                                 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5207         if (REG_GET_FIELD(tmp, SRBM_STATUS, SEM_BUSY))
5208                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5209                                                 SRBM_SOFT_RESET, SOFT_RESET_SEM, 1);
5210
5211         if (grbm_soft_reset || srbm_soft_reset) {
5212                 adev->gfx.grbm_soft_reset = grbm_soft_reset;
5213                 adev->gfx.srbm_soft_reset = srbm_soft_reset;
5214                 return true;
5215         } else {
5216                 adev->gfx.grbm_soft_reset = 0;
5217                 adev->gfx.srbm_soft_reset = 0;
5218                 return false;
5219         }
5220 }
5221
5222 static int gfx_v8_0_pre_soft_reset(void *handle)
5223 {
5224         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5225         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5226
5227         if ((!adev->gfx.grbm_soft_reset) &&
5228             (!adev->gfx.srbm_soft_reset))
5229                 return 0;
5230
5231         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5232         srbm_soft_reset = adev->gfx.srbm_soft_reset;
5233
5234         /* stop the rlc */
5235         gfx_v8_0_rlc_stop(adev);
5236
5237         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5238             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5239                 /* Disable GFX parsing/prefetching */
5240                 gfx_v8_0_cp_gfx_enable(adev, false);
5241
5242         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5243             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5244             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5245             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5246                 int i;
5247
5248                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5249                         struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5250
5251                         mutex_lock(&adev->srbm_mutex);
5252                         vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5253                         gfx_v8_0_deactivate_hqd(adev, 2);
5254                         vi_srbm_select(adev, 0, 0, 0, 0);
5255                         mutex_unlock(&adev->srbm_mutex);
5256                 }
5257                 /* Disable MEC parsing/prefetching */
5258                 gfx_v8_0_cp_compute_enable(adev, false);
5259         }
5260
5261        return 0;
5262 }
5263
5264 static int gfx_v8_0_soft_reset(void *handle)
5265 {
5266         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5267         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5268         u32 tmp;
5269
5270         if ((!adev->gfx.grbm_soft_reset) &&
5271             (!adev->gfx.srbm_soft_reset))
5272                 return 0;
5273
5274         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5275         srbm_soft_reset = adev->gfx.srbm_soft_reset;
5276
5277         if (grbm_soft_reset || srbm_soft_reset) {
5278                 tmp = RREG32(mmGMCON_DEBUG);
5279                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 1);
5280                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 1);
5281                 WREG32(mmGMCON_DEBUG, tmp);
5282                 udelay(50);
5283         }
5284
5285         if (grbm_soft_reset) {
5286                 tmp = RREG32(mmGRBM_SOFT_RESET);
5287                 tmp |= grbm_soft_reset;
5288                 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5289                 WREG32(mmGRBM_SOFT_RESET, tmp);
5290                 tmp = RREG32(mmGRBM_SOFT_RESET);
5291
5292                 udelay(50);
5293
5294                 tmp &= ~grbm_soft_reset;
5295                 WREG32(mmGRBM_SOFT_RESET, tmp);
5296                 tmp = RREG32(mmGRBM_SOFT_RESET);
5297         }
5298
5299         if (srbm_soft_reset) {
5300                 tmp = RREG32(mmSRBM_SOFT_RESET);
5301                 tmp |= srbm_soft_reset;
5302                 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5303                 WREG32(mmSRBM_SOFT_RESET, tmp);
5304                 tmp = RREG32(mmSRBM_SOFT_RESET);
5305
5306                 udelay(50);
5307
5308                 tmp &= ~srbm_soft_reset;
5309                 WREG32(mmSRBM_SOFT_RESET, tmp);
5310                 tmp = RREG32(mmSRBM_SOFT_RESET);
5311         }
5312
5313         if (grbm_soft_reset || srbm_soft_reset) {
5314                 tmp = RREG32(mmGMCON_DEBUG);
5315                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 0);
5316                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 0);
5317                 WREG32(mmGMCON_DEBUG, tmp);
5318         }
5319
5320         /* Wait a little for things to settle down */
5321         udelay(50);
5322
5323         return 0;
5324 }
5325
5326 static int gfx_v8_0_post_soft_reset(void *handle)
5327 {
5328         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5329         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5330
5331         if ((!adev->gfx.grbm_soft_reset) &&
5332             (!adev->gfx.srbm_soft_reset))
5333                 return 0;
5334
5335         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5336         srbm_soft_reset = adev->gfx.srbm_soft_reset;
5337
5338         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5339             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5340             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5341             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5342                 int i;
5343
5344                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5345                         struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5346
5347                         mutex_lock(&adev->srbm_mutex);
5348                         vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5349                         gfx_v8_0_deactivate_hqd(adev, 2);
5350                         vi_srbm_select(adev, 0, 0, 0, 0);
5351                         mutex_unlock(&adev->srbm_mutex);
5352                 }
5353                 gfx_v8_0_kiq_resume(adev);
5354                 gfx_v8_0_kcq_resume(adev);
5355         }
5356
5357         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5358             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5359                 gfx_v8_0_cp_gfx_resume(adev);
5360
5361         gfx_v8_0_rlc_start(adev);
5362
5363         return 0;
5364 }
5365
5366 /**
5367  * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
5368  *
5369  * @adev: amdgpu_device pointer
5370  *
5371  * Fetches a GPU clock counter snapshot.
5372  * Returns the 64 bit clock counter snapshot.
5373  */
5374 static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
5375 {
5376         uint64_t clock;
5377
5378         mutex_lock(&adev->gfx.gpu_clock_mutex);
5379         WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
5380         clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
5381                 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5382         mutex_unlock(&adev->gfx.gpu_clock_mutex);
5383         return clock;
5384 }
5385
5386 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
5387                                           uint32_t vmid,
5388                                           uint32_t gds_base, uint32_t gds_size,
5389                                           uint32_t gws_base, uint32_t gws_size,
5390                                           uint32_t oa_base, uint32_t oa_size)
5391 {
5392         /* GDS Base */
5393         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5394         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5395                                 WRITE_DATA_DST_SEL(0)));
5396         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
5397         amdgpu_ring_write(ring, 0);
5398         amdgpu_ring_write(ring, gds_base);
5399
5400         /* GDS Size */
5401         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5402         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5403                                 WRITE_DATA_DST_SEL(0)));
5404         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
5405         amdgpu_ring_write(ring, 0);
5406         amdgpu_ring_write(ring, gds_size);
5407
5408         /* GWS */
5409         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5410         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5411                                 WRITE_DATA_DST_SEL(0)));
5412         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
5413         amdgpu_ring_write(ring, 0);
5414         amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
5415
5416         /* OA */
5417         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5418         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5419                                 WRITE_DATA_DST_SEL(0)));
5420         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
5421         amdgpu_ring_write(ring, 0);
5422         amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
5423 }
5424
5425 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
5426 {
5427         WREG32(mmSQ_IND_INDEX,
5428                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5429                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5430                 (address << SQ_IND_INDEX__INDEX__SHIFT) |
5431                 (SQ_IND_INDEX__FORCE_READ_MASK));
5432         return RREG32(mmSQ_IND_DATA);
5433 }
5434
5435 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
5436                            uint32_t wave, uint32_t thread,
5437                            uint32_t regno, uint32_t num, uint32_t *out)
5438 {
5439         WREG32(mmSQ_IND_INDEX,
5440                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5441                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5442                 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
5443                 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
5444                 (SQ_IND_INDEX__FORCE_READ_MASK) |
5445                 (SQ_IND_INDEX__AUTO_INCR_MASK));
5446         while (num--)
5447                 *(out++) = RREG32(mmSQ_IND_DATA);
5448 }
5449
5450 static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
5451 {
5452         /* type 0 wave data */
5453         dst[(*no_fields)++] = 0;
5454         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
5455         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
5456         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
5457         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
5458         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
5459         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
5460         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
5461         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
5462         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
5463         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
5464         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
5465         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
5466         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_LO);
5467         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_HI);
5468         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_LO);
5469         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI);
5470         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
5471         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
5472 }
5473
5474 static void gfx_v8_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
5475                                      uint32_t wave, uint32_t start,
5476                                      uint32_t size, uint32_t *dst)
5477 {
5478         wave_read_regs(
5479                 adev, simd, wave, 0,
5480                 start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
5481 }
5482
5483
5484 static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = {
5485         .get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter,
5486         .select_se_sh = &gfx_v8_0_select_se_sh,
5487         .read_wave_data = &gfx_v8_0_read_wave_data,
5488         .read_wave_sgprs = &gfx_v8_0_read_wave_sgprs,
5489         .select_me_pipe_q = &gfx_v8_0_select_me_pipe_q
5490 };
5491
5492 static int gfx_v8_0_early_init(void *handle)
5493 {
5494         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5495
5496         adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
5497         adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
5498         adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
5499         gfx_v8_0_set_ring_funcs(adev);
5500         gfx_v8_0_set_irq_funcs(adev);
5501         gfx_v8_0_set_gds_init(adev);
5502         gfx_v8_0_set_rlc_funcs(adev);
5503
5504         return 0;
5505 }
5506
5507 static int gfx_v8_0_late_init(void *handle)
5508 {
5509         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5510         int r;
5511
5512         r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
5513         if (r)
5514                 return r;
5515
5516         r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
5517         if (r)
5518                 return r;
5519
5520         /* requires IBs so do in late init after IB pool is initialized */
5521         r = gfx_v8_0_do_edc_gpr_workarounds(adev);
5522         if (r)
5523                 return r;
5524
5525         r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0);
5526         if (r) {
5527                 DRM_ERROR("amdgpu_irq_get() failed to get IRQ for EDC, r: %d.\n", r);
5528                 return r;
5529         }
5530
5531         r = amdgpu_irq_get(adev, &adev->gfx.sq_irq, 0);
5532         if (r) {
5533                 DRM_ERROR(
5534                         "amdgpu_irq_get() failed to get IRQ for SQ, r: %d.\n",
5535                         r);
5536                 return r;
5537         }
5538
5539         return 0;
5540 }
5541
5542 static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
5543                                                        bool enable)
5544 {
5545         if (((adev->asic_type == CHIP_POLARIS11) ||
5546             (adev->asic_type == CHIP_POLARIS12) ||
5547             (adev->asic_type == CHIP_VEGAM)) &&
5548             adev->powerplay.pp_funcs->set_powergating_by_smu)
5549                 /* Send msg to SMU via Powerplay */
5550                 amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, enable);
5551
5552         WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0);
5553 }
5554
5555 static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
5556                                                         bool enable)
5557 {
5558         WREG32_FIELD(RLC_PG_CNTL, DYN_PER_CU_PG_ENABLE, enable ? 1 : 0);
5559 }
5560
5561 static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
5562                 bool enable)
5563 {
5564         WREG32_FIELD(RLC_PG_CNTL, QUICK_PG_ENABLE, enable ? 1 : 0);
5565 }
5566
5567 static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
5568                                           bool enable)
5569 {
5570         WREG32_FIELD(RLC_PG_CNTL, GFX_POWER_GATING_ENABLE, enable ? 1 : 0);
5571 }
5572
5573 static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev,
5574                                                 bool enable)
5575 {
5576         WREG32_FIELD(RLC_PG_CNTL, GFX_PIPELINE_PG_ENABLE, enable ? 1 : 0);
5577
5578         /* Read any GFX register to wake up GFX. */
5579         if (!enable)
5580                 RREG32(mmDB_RENDER_CONTROL);
5581 }
5582
5583 static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev,
5584                                           bool enable)
5585 {
5586         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
5587                 cz_enable_gfx_cg_power_gating(adev, true);
5588                 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
5589                         cz_enable_gfx_pipeline_power_gating(adev, true);
5590         } else {
5591                 cz_enable_gfx_cg_power_gating(adev, false);
5592                 cz_enable_gfx_pipeline_power_gating(adev, false);
5593         }
5594 }
5595
5596 static int gfx_v8_0_set_powergating_state(void *handle,
5597                                           enum amd_powergating_state state)
5598 {
5599         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5600         bool enable = (state == AMD_PG_STATE_GATE);
5601
5602         if (amdgpu_sriov_vf(adev))
5603                 return 0;
5604
5605         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_SMG |
5606                                 AMD_PG_SUPPORT_RLC_SMU_HS |
5607                                 AMD_PG_SUPPORT_CP |
5608                                 AMD_PG_SUPPORT_GFX_DMG))
5609                 adev->gfx.rlc.funcs->enter_safe_mode(adev);
5610         switch (adev->asic_type) {
5611         case CHIP_CARRIZO:
5612         case CHIP_STONEY:
5613
5614                 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
5615                         cz_enable_sck_slow_down_on_power_up(adev, true);
5616                         cz_enable_sck_slow_down_on_power_down(adev, true);
5617                 } else {
5618                         cz_enable_sck_slow_down_on_power_up(adev, false);
5619                         cz_enable_sck_slow_down_on_power_down(adev, false);
5620                 }
5621                 if (adev->pg_flags & AMD_PG_SUPPORT_CP)
5622                         cz_enable_cp_power_gating(adev, true);
5623                 else
5624                         cz_enable_cp_power_gating(adev, false);
5625
5626                 cz_update_gfx_cg_power_gating(adev, enable);
5627
5628                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5629                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5630                 else
5631                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5632
5633                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5634                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5635                 else
5636                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5637                 break;
5638         case CHIP_POLARIS11:
5639         case CHIP_POLARIS12:
5640         case CHIP_VEGAM:
5641                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5642                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5643                 else
5644                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5645
5646                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5647                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5648                 else
5649                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5650
5651                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable)
5652                         polaris11_enable_gfx_quick_mg_power_gating(adev, true);
5653                 else
5654                         polaris11_enable_gfx_quick_mg_power_gating(adev, false);
5655                 break;
5656         default:
5657                 break;
5658         }
5659         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_SMG |
5660                                 AMD_PG_SUPPORT_RLC_SMU_HS |
5661                                 AMD_PG_SUPPORT_CP |
5662                                 AMD_PG_SUPPORT_GFX_DMG))
5663                 adev->gfx.rlc.funcs->exit_safe_mode(adev);
5664         return 0;
5665 }
5666
5667 static void gfx_v8_0_get_clockgating_state(void *handle, u32 *flags)
5668 {
5669         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5670         int data;
5671
5672         if (amdgpu_sriov_vf(adev))
5673                 *flags = 0;
5674
5675         /* AMD_CG_SUPPORT_GFX_MGCG */
5676         data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5677         if (!(data & RLC_CGTT_MGCG_OVERRIDE__CPF_MASK))
5678                 *flags |= AMD_CG_SUPPORT_GFX_MGCG;
5679
5680         /* AMD_CG_SUPPORT_GFX_CGLG */
5681         data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5682         if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5683                 *flags |= AMD_CG_SUPPORT_GFX_CGCG;
5684
5685         /* AMD_CG_SUPPORT_GFX_CGLS */
5686         if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5687                 *flags |= AMD_CG_SUPPORT_GFX_CGLS;
5688
5689         /* AMD_CG_SUPPORT_GFX_CGTS */
5690         data = RREG32(mmCGTS_SM_CTRL_REG);
5691         if (!(data & CGTS_SM_CTRL_REG__OVERRIDE_MASK))
5692                 *flags |= AMD_CG_SUPPORT_GFX_CGTS;
5693
5694         /* AMD_CG_SUPPORT_GFX_CGTS_LS */
5695         if (!(data & CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK))
5696                 *flags |= AMD_CG_SUPPORT_GFX_CGTS_LS;
5697
5698         /* AMD_CG_SUPPORT_GFX_RLC_LS */
5699         data = RREG32(mmRLC_MEM_SLP_CNTL);
5700         if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
5701                 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
5702
5703         /* AMD_CG_SUPPORT_GFX_CP_LS */
5704         data = RREG32(mmCP_MEM_SLP_CNTL);
5705         if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
5706                 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
5707 }
5708
5709 static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
5710                                      uint32_t reg_addr, uint32_t cmd)
5711 {
5712         uint32_t data;
5713
5714         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
5715
5716         WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5717         WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5718
5719         data = RREG32(mmRLC_SERDES_WR_CTRL);
5720         if (adev->asic_type == CHIP_STONEY)
5721                 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5722                           RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5723                           RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5724                           RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5725                           RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5726                           RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5727                           RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5728                           RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5729                           RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5730         else
5731                 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5732                           RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5733                           RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5734                           RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5735                           RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5736                           RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5737                           RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5738                           RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5739                           RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
5740                           RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
5741                           RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5742         data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
5743                  (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
5744                  (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
5745                  (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
5746
5747         WREG32(mmRLC_SERDES_WR_CTRL, data);
5748 }
5749
5750 #define MSG_ENTER_RLC_SAFE_MODE     1
5751 #define MSG_EXIT_RLC_SAFE_MODE      0
5752 #define RLC_GPR_REG2__REQ_MASK 0x00000001
5753 #define RLC_GPR_REG2__REQ__SHIFT 0
5754 #define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001
5755 #define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e
5756
5757 static void iceland_enter_rlc_safe_mode(struct amdgpu_device *adev)
5758 {
5759         u32 data;
5760         unsigned i;
5761
5762         data = RREG32(mmRLC_CNTL);
5763         if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5764                 return;
5765
5766         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5767                 data |= RLC_SAFE_MODE__CMD_MASK;
5768                 data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5769                 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
5770                 WREG32(mmRLC_SAFE_MODE, data);
5771
5772                 for (i = 0; i < adev->usec_timeout; i++) {
5773                         if ((RREG32(mmRLC_GPM_STAT) &
5774                              (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5775                               RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5776                             (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5777                              RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5778                                 break;
5779                         udelay(1);
5780                 }
5781
5782                 for (i = 0; i < adev->usec_timeout; i++) {
5783                         if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5784                                 break;
5785                         udelay(1);
5786                 }
5787                 adev->gfx.rlc.in_safe_mode = true;
5788         }
5789 }
5790
5791 static void iceland_exit_rlc_safe_mode(struct amdgpu_device *adev)
5792 {
5793         u32 data = 0;
5794         unsigned i;
5795
5796         data = RREG32(mmRLC_CNTL);
5797         if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5798                 return;
5799
5800         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5801                 if (adev->gfx.rlc.in_safe_mode) {
5802                         data |= RLC_SAFE_MODE__CMD_MASK;
5803                         data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5804                         WREG32(mmRLC_SAFE_MODE, data);
5805                         adev->gfx.rlc.in_safe_mode = false;
5806                 }
5807         }
5808
5809         for (i = 0; i < adev->usec_timeout; i++) {
5810                 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5811                         break;
5812                 udelay(1);
5813         }
5814 }
5815
5816 static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
5817         .enter_safe_mode = iceland_enter_rlc_safe_mode,
5818         .exit_safe_mode = iceland_exit_rlc_safe_mode
5819 };
5820
5821 static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
5822                                                       bool enable)
5823 {
5824         uint32_t temp, data;
5825
5826         adev->gfx.rlc.funcs->enter_safe_mode(adev);
5827
5828         /* It is disabled by HW by default */
5829         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
5830                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5831                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS)
5832                                 /* 1 - RLC memory Light sleep */
5833                                 WREG32_FIELD(RLC_MEM_SLP_CNTL, RLC_MEM_LS_EN, 1);
5834
5835                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS)
5836                                 WREG32_FIELD(CP_MEM_SLP_CNTL, CP_MEM_LS_EN, 1);
5837                 }
5838
5839                 /* 3 - RLC_CGTT_MGCG_OVERRIDE */
5840                 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5841                 if (adev->flags & AMD_IS_APU)
5842                         data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5843                                   RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5844                                   RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK);
5845                 else
5846                         data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5847                                   RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5848                                   RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5849                                   RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5850
5851                 if (temp != data)
5852                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5853
5854                 /* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5855                 gfx_v8_0_wait_for_rlc_serdes(adev);
5856
5857                 /* 5 - clear mgcg override */
5858                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5859
5860                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
5861                         /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
5862                         temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5863                         data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
5864                         data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
5865                         data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
5866                         data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
5867                         if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
5868                             (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
5869                                 data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
5870                         data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
5871                         data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
5872                         if (temp != data)
5873                                 WREG32(mmCGTS_SM_CTRL_REG, data);
5874                 }
5875                 udelay(50);
5876
5877                 /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5878                 gfx_v8_0_wait_for_rlc_serdes(adev);
5879         } else {
5880                 /* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
5881                 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5882                 data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5883                                 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5884                                 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5885                                 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5886                 if (temp != data)
5887                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5888
5889                 /* 2 - disable MGLS in RLC */
5890                 data = RREG32(mmRLC_MEM_SLP_CNTL);
5891                 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
5892                         data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5893                         WREG32(mmRLC_MEM_SLP_CNTL, data);
5894                 }
5895
5896                 /* 3 - disable MGLS in CP */
5897                 data = RREG32(mmCP_MEM_SLP_CNTL);
5898                 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
5899                         data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5900                         WREG32(mmCP_MEM_SLP_CNTL, data);
5901                 }
5902
5903                 /* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
5904                 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5905                 data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
5906                                 CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
5907                 if (temp != data)
5908                         WREG32(mmCGTS_SM_CTRL_REG, data);
5909
5910                 /* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5911                 gfx_v8_0_wait_for_rlc_serdes(adev);
5912
5913                 /* 6 - set mgcg override */
5914                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5915
5916                 udelay(50);
5917
5918                 /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5919                 gfx_v8_0_wait_for_rlc_serdes(adev);
5920         }
5921
5922         adev->gfx.rlc.funcs->exit_safe_mode(adev);
5923 }
5924
5925 static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5926                                                       bool enable)
5927 {
5928         uint32_t temp, temp1, data, data1;
5929
5930         temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5931
5932         adev->gfx.rlc.funcs->enter_safe_mode(adev);
5933
5934         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
5935                 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5936                 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
5937                 if (temp1 != data1)
5938                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5939
5940                 /* : wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5941                 gfx_v8_0_wait_for_rlc_serdes(adev);
5942
5943                 /* 2 - clear cgcg override */
5944                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5945
5946                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5947                 gfx_v8_0_wait_for_rlc_serdes(adev);
5948
5949                 /* 3 - write cmd to set CGLS */
5950                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
5951
5952                 /* 4 - enable cgcg */
5953                 data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5954
5955                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5956                         /* enable cgls*/
5957                         data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5958
5959                         temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5960                         data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
5961
5962                         if (temp1 != data1)
5963                                 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5964                 } else {
5965                         data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5966                 }
5967
5968                 if (temp != data)
5969                         WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5970
5971                 /* 5 enable cntx_empty_int_enable/cntx_busy_int_enable/
5972                  * Cmp_busy/GFX_Idle interrupts
5973                  */
5974                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5975         } else {
5976                 /* disable cntx_empty_int_enable & GFX Idle interrupt */
5977                 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5978
5979                 /* TEST CGCG */
5980                 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5981                 data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
5982                                 RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
5983                 if (temp1 != data1)
5984                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5985
5986                 /* read gfx register to wake up cgcg */
5987                 RREG32(mmCB_CGTT_SCLK_CTRL);
5988                 RREG32(mmCB_CGTT_SCLK_CTRL);
5989                 RREG32(mmCB_CGTT_SCLK_CTRL);
5990                 RREG32(mmCB_CGTT_SCLK_CTRL);
5991
5992                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5993                 gfx_v8_0_wait_for_rlc_serdes(adev);
5994
5995                 /* write cmd to Set CGCG Overrride */
5996                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5997
5998                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5999                 gfx_v8_0_wait_for_rlc_serdes(adev);
6000
6001                 /* write cmd to Clear CGLS */
6002                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
6003
6004                 /* disable cgcg, cgls should be disabled too. */
6005                 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
6006                           RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
6007                 if (temp != data)
6008                         WREG32(mmRLC_CGCG_CGLS_CTRL, data);
6009                 /* enable interrupts again for PG */
6010                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
6011         }
6012
6013         gfx_v8_0_wait_for_rlc_serdes(adev);
6014
6015         adev->gfx.rlc.funcs->exit_safe_mode(adev);
6016 }
6017 static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
6018                                             bool enable)
6019 {
6020         if (enable) {
6021                 /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
6022                  * ===  MGCG + MGLS + TS(CG/LS) ===
6023                  */
6024                 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
6025                 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
6026         } else {
6027                 /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
6028                  * ===  CGCG + CGLS ===
6029                  */
6030                 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
6031                 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
6032         }
6033         return 0;
6034 }
6035
6036 static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev,
6037                                           enum amd_clockgating_state state)
6038 {
6039         uint32_t msg_id, pp_state = 0;
6040         uint32_t pp_support_state = 0;
6041
6042         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
6043                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
6044                         pp_support_state = PP_STATE_SUPPORT_LS;
6045                         pp_state = PP_STATE_LS;
6046                 }
6047                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
6048                         pp_support_state |= PP_STATE_SUPPORT_CG;
6049                         pp_state |= PP_STATE_CG;
6050                 }
6051                 if (state == AMD_CG_STATE_UNGATE)
6052                         pp_state = 0;
6053
6054                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6055                                 PP_BLOCK_GFX_CG,
6056                                 pp_support_state,
6057                                 pp_state);
6058                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6059                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6060         }
6061
6062         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
6063                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
6064                         pp_support_state = PP_STATE_SUPPORT_LS;
6065                         pp_state = PP_STATE_LS;
6066                 }
6067
6068                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
6069                         pp_support_state |= PP_STATE_SUPPORT_CG;
6070                         pp_state |= PP_STATE_CG;
6071                 }
6072
6073                 if (state == AMD_CG_STATE_UNGATE)
6074                         pp_state = 0;
6075
6076                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6077                                 PP_BLOCK_GFX_MG,
6078                                 pp_support_state,
6079                                 pp_state);
6080                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6081                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6082         }
6083
6084         return 0;
6085 }
6086
6087 static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev,
6088                                           enum amd_clockgating_state state)
6089 {
6090
6091         uint32_t msg_id, pp_state = 0;
6092         uint32_t pp_support_state = 0;
6093
6094         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
6095                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
6096                         pp_support_state = PP_STATE_SUPPORT_LS;
6097                         pp_state = PP_STATE_LS;
6098                 }
6099                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
6100                         pp_support_state |= PP_STATE_SUPPORT_CG;
6101                         pp_state |= PP_STATE_CG;
6102                 }
6103                 if (state == AMD_CG_STATE_UNGATE)
6104                         pp_state = 0;
6105
6106                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6107                                 PP_BLOCK_GFX_CG,
6108                                 pp_support_state,
6109                                 pp_state);
6110                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6111                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6112         }
6113
6114         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_GFX_3D_CGLS)) {
6115                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) {
6116                         pp_support_state = PP_STATE_SUPPORT_LS;
6117                         pp_state = PP_STATE_LS;
6118                 }
6119                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) {
6120                         pp_support_state |= PP_STATE_SUPPORT_CG;
6121                         pp_state |= PP_STATE_CG;
6122                 }
6123                 if (state == AMD_CG_STATE_UNGATE)
6124                         pp_state = 0;
6125
6126                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6127                                 PP_BLOCK_GFX_3D,
6128                                 pp_support_state,
6129                                 pp_state);
6130                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6131                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6132         }
6133
6134         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
6135                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
6136                         pp_support_state = PP_STATE_SUPPORT_LS;
6137                         pp_state = PP_STATE_LS;
6138                 }
6139
6140                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
6141                         pp_support_state |= PP_STATE_SUPPORT_CG;
6142                         pp_state |= PP_STATE_CG;
6143                 }
6144
6145                 if (state == AMD_CG_STATE_UNGATE)
6146                         pp_state = 0;
6147
6148                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6149                                 PP_BLOCK_GFX_MG,
6150                                 pp_support_state,
6151                                 pp_state);
6152                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6153                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6154         }
6155
6156         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
6157                 pp_support_state = PP_STATE_SUPPORT_LS;
6158
6159                 if (state == AMD_CG_STATE_UNGATE)
6160                         pp_state = 0;
6161                 else
6162                         pp_state = PP_STATE_LS;
6163
6164                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6165                                 PP_BLOCK_GFX_RLC,
6166                                 pp_support_state,
6167                                 pp_state);
6168                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6169                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6170         }
6171
6172         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
6173                 pp_support_state = PP_STATE_SUPPORT_LS;
6174
6175                 if (state == AMD_CG_STATE_UNGATE)
6176                         pp_state = 0;
6177                 else
6178                         pp_state = PP_STATE_LS;
6179                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6180                         PP_BLOCK_GFX_CP,
6181                         pp_support_state,
6182                         pp_state);
6183                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6184                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6185         }
6186
6187         return 0;
6188 }
6189
6190 static int gfx_v8_0_set_clockgating_state(void *handle,
6191                                           enum amd_clockgating_state state)
6192 {
6193         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
6194
6195         if (amdgpu_sriov_vf(adev))
6196                 return 0;
6197
6198         switch (adev->asic_type) {
6199         case CHIP_FIJI:
6200         case CHIP_CARRIZO:
6201         case CHIP_STONEY:
6202                 gfx_v8_0_update_gfx_clock_gating(adev,
6203                                                  state == AMD_CG_STATE_GATE);
6204                 break;
6205         case CHIP_TONGA:
6206                 gfx_v8_0_tonga_update_gfx_clock_gating(adev, state);
6207                 break;
6208         case CHIP_POLARIS10:
6209         case CHIP_POLARIS11:
6210         case CHIP_POLARIS12:
6211         case CHIP_VEGAM:
6212                 gfx_v8_0_polaris_update_gfx_clock_gating(adev, state);
6213                 break;
6214         default:
6215                 break;
6216         }
6217         return 0;
6218 }
6219
6220 static u64 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring)
6221 {
6222         return ring->adev->wb.wb[ring->rptr_offs];
6223 }
6224
6225 static u64 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
6226 {
6227         struct amdgpu_device *adev = ring->adev;
6228
6229         if (ring->use_doorbell)
6230                 /* XXX check if swapping is necessary on BE */
6231                 return ring->adev->wb.wb[ring->wptr_offs];
6232         else
6233                 return RREG32(mmCP_RB0_WPTR);
6234 }
6235
6236 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
6237 {
6238         struct amdgpu_device *adev = ring->adev;
6239
6240         if (ring->use_doorbell) {
6241                 /* XXX check if swapping is necessary on BE */
6242                 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6243                 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6244         } else {
6245                 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
6246                 (void)RREG32(mmCP_RB0_WPTR);
6247         }
6248 }
6249
6250 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
6251 {
6252         u32 ref_and_mask, reg_mem_engine;
6253
6254         if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) ||
6255             (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)) {
6256                 switch (ring->me) {
6257                 case 1:
6258                         ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
6259                         break;
6260                 case 2:
6261                         ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
6262                         break;
6263                 default:
6264                         return;
6265                 }
6266                 reg_mem_engine = 0;
6267         } else {
6268                 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
6269                 reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
6270         }
6271
6272         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6273         amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
6274                                  WAIT_REG_MEM_FUNCTION(3) |  /* == */
6275                                  reg_mem_engine));
6276         amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
6277         amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
6278         amdgpu_ring_write(ring, ref_and_mask);
6279         amdgpu_ring_write(ring, ref_and_mask);
6280         amdgpu_ring_write(ring, 0x20); /* poll interval */
6281 }
6282
6283 static void gfx_v8_0_ring_emit_vgt_flush(struct amdgpu_ring *ring)
6284 {
6285         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6286         amdgpu_ring_write(ring, EVENT_TYPE(VS_PARTIAL_FLUSH) |
6287                 EVENT_INDEX(4));
6288
6289         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6290         amdgpu_ring_write(ring, EVENT_TYPE(VGT_FLUSH) |
6291                 EVENT_INDEX(0));
6292 }
6293
6294 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
6295                                       struct amdgpu_ib *ib,
6296                                       unsigned vmid, bool ctx_switch)
6297 {
6298         u32 header, control = 0;
6299
6300         if (ib->flags & AMDGPU_IB_FLAG_CE)
6301                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
6302         else
6303                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
6304
6305         control |= ib->length_dw | (vmid << 24);
6306
6307         if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
6308                 control |= INDIRECT_BUFFER_PRE_ENB(1);
6309
6310                 if (!(ib->flags & AMDGPU_IB_FLAG_CE))
6311                         gfx_v8_0_ring_emit_de_meta(ring);
6312         }
6313
6314         amdgpu_ring_write(ring, header);
6315         amdgpu_ring_write(ring,
6316 #ifdef __BIG_ENDIAN
6317                           (2 << 0) |
6318 #endif
6319                           (ib->gpu_addr & 0xFFFFFFFC));
6320         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6321         amdgpu_ring_write(ring, control);
6322 }
6323
6324 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
6325                                           struct amdgpu_ib *ib,
6326                                           unsigned vmid, bool ctx_switch)
6327 {
6328         u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
6329
6330         amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
6331         amdgpu_ring_write(ring,
6332 #ifdef __BIG_ENDIAN
6333                                 (2 << 0) |
6334 #endif
6335                                 (ib->gpu_addr & 0xFFFFFFFC));
6336         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6337         amdgpu_ring_write(ring, control);
6338 }
6339
6340 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
6341                                          u64 seq, unsigned flags)
6342 {
6343         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6344         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6345
6346         /* EVENT_WRITE_EOP - flush caches, send int */
6347         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6348         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6349                                  EOP_TC_ACTION_EN |
6350                                  EOP_TC_WB_ACTION_EN |
6351                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6352                                  EVENT_INDEX(5)));
6353         amdgpu_ring_write(ring, addr & 0xfffffffc);
6354         amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
6355                           DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6356         amdgpu_ring_write(ring, lower_32_bits(seq));
6357         amdgpu_ring_write(ring, upper_32_bits(seq));
6358
6359 }
6360
6361 static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
6362 {
6363         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6364         uint32_t seq = ring->fence_drv.sync_seq;
6365         uint64_t addr = ring->fence_drv.gpu_addr;
6366
6367         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6368         amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
6369                                  WAIT_REG_MEM_FUNCTION(3) | /* equal */
6370                                  WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
6371         amdgpu_ring_write(ring, addr & 0xfffffffc);
6372         amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
6373         amdgpu_ring_write(ring, seq);
6374         amdgpu_ring_write(ring, 0xffffffff);
6375         amdgpu_ring_write(ring, 4); /* poll interval */
6376 }
6377
6378 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
6379                                         unsigned vmid, uint64_t pd_addr)
6380 {
6381         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6382
6383         amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
6384
6385         /* wait for the invalidate to complete */
6386         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6387         amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6388                                  WAIT_REG_MEM_FUNCTION(0) |  /* always */
6389                                  WAIT_REG_MEM_ENGINE(0))); /* me */
6390         amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6391         amdgpu_ring_write(ring, 0);
6392         amdgpu_ring_write(ring, 0); /* ref */
6393         amdgpu_ring_write(ring, 0); /* mask */
6394         amdgpu_ring_write(ring, 0x20); /* poll interval */
6395
6396         /* compute doesn't have PFP */
6397         if (usepfp) {
6398                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
6399                 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6400                 amdgpu_ring_write(ring, 0x0);
6401         }
6402 }
6403
6404 static u64 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
6405 {
6406         return ring->adev->wb.wb[ring->wptr_offs];
6407 }
6408
6409 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
6410 {
6411         struct amdgpu_device *adev = ring->adev;
6412
6413         /* XXX check if swapping is necessary on BE */
6414         adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6415         WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6416 }
6417
6418 static void gfx_v8_0_ring_set_pipe_percent(struct amdgpu_ring *ring,
6419                                            bool acquire)
6420 {
6421         struct amdgpu_device *adev = ring->adev;
6422         int pipe_num, tmp, reg;
6423         int pipe_percent = acquire ? SPI_WCL_PIPE_PERCENT_GFX__VALUE_MASK : 0x1;
6424
6425         pipe_num = ring->me * adev->gfx.mec.num_pipe_per_mec + ring->pipe;
6426
6427         /* first me only has 2 entries, GFX and HP3D */
6428         if (ring->me > 0)
6429                 pipe_num -= 2;
6430
6431         reg = mmSPI_WCL_PIPE_PERCENT_GFX + pipe_num;
6432         tmp = RREG32(reg);
6433         tmp = REG_SET_FIELD(tmp, SPI_WCL_PIPE_PERCENT_GFX, VALUE, pipe_percent);
6434         WREG32(reg, tmp);
6435 }
6436
6437 static void gfx_v8_0_pipe_reserve_resources(struct amdgpu_device *adev,
6438                                             struct amdgpu_ring *ring,
6439                                             bool acquire)
6440 {
6441         int i, pipe;
6442         bool reserve;
6443         struct amdgpu_ring *iring;
6444
6445         mutex_lock(&adev->gfx.pipe_reserve_mutex);
6446         pipe = amdgpu_gfx_queue_to_bit(adev, ring->me, ring->pipe, 0);
6447         if (acquire)
6448                 set_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6449         else
6450                 clear_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6451
6452         if (!bitmap_weight(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)) {
6453                 /* Clear all reservations - everyone reacquires all resources */
6454                 for (i = 0; i < adev->gfx.num_gfx_rings; ++i)
6455                         gfx_v8_0_ring_set_pipe_percent(&adev->gfx.gfx_ring[i],
6456                                                        true);
6457
6458                 for (i = 0; i < adev->gfx.num_compute_rings; ++i)
6459                         gfx_v8_0_ring_set_pipe_percent(&adev->gfx.compute_ring[i],
6460                                                        true);
6461         } else {
6462                 /* Lower all pipes without a current reservation */
6463                 for (i = 0; i < adev->gfx.num_gfx_rings; ++i) {
6464                         iring = &adev->gfx.gfx_ring[i];
6465                         pipe = amdgpu_gfx_queue_to_bit(adev,
6466                                                        iring->me,
6467                                                        iring->pipe,
6468                                                        0);
6469                         reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6470                         gfx_v8_0_ring_set_pipe_percent(iring, reserve);
6471                 }
6472
6473                 for (i = 0; i < adev->gfx.num_compute_rings; ++i) {
6474                         iring = &adev->gfx.compute_ring[i];
6475                         pipe = amdgpu_gfx_queue_to_bit(adev,
6476                                                        iring->me,
6477                                                        iring->pipe,
6478                                                        0);
6479                         reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6480                         gfx_v8_0_ring_set_pipe_percent(iring, reserve);
6481                 }
6482         }
6483
6484         mutex_unlock(&adev->gfx.pipe_reserve_mutex);
6485 }
6486
6487 static void gfx_v8_0_hqd_set_priority(struct amdgpu_device *adev,
6488                                       struct amdgpu_ring *ring,
6489                                       bool acquire)
6490 {
6491         uint32_t pipe_priority = acquire ? 0x2 : 0x0;
6492         uint32_t queue_priority = acquire ? 0xf : 0x0;
6493
6494         mutex_lock(&adev->srbm_mutex);
6495         vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
6496
6497         WREG32(mmCP_HQD_PIPE_PRIORITY, pipe_priority);
6498         WREG32(mmCP_HQD_QUEUE_PRIORITY, queue_priority);
6499
6500         vi_srbm_select(adev, 0, 0, 0, 0);
6501         mutex_unlock(&adev->srbm_mutex);
6502 }
6503 static void gfx_v8_0_ring_set_priority_compute(struct amdgpu_ring *ring,
6504                                                enum drm_sched_priority priority)
6505 {
6506         struct amdgpu_device *adev = ring->adev;
6507         bool acquire = priority == DRM_SCHED_PRIORITY_HIGH_HW;
6508
6509         if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE)
6510                 return;
6511
6512         gfx_v8_0_hqd_set_priority(adev, ring, acquire);
6513         gfx_v8_0_pipe_reserve_resources(adev, ring, acquire);
6514 }
6515
6516 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
6517                                              u64 addr, u64 seq,
6518                                              unsigned flags)
6519 {
6520         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6521         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6522
6523         /* RELEASE_MEM - flush caches, send int */
6524         amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
6525         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6526                                  EOP_TC_ACTION_EN |
6527                                  EOP_TC_WB_ACTION_EN |
6528                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6529                                  EVENT_INDEX(5)));
6530         amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6531         amdgpu_ring_write(ring, addr & 0xfffffffc);
6532         amdgpu_ring_write(ring, upper_32_bits(addr));
6533         amdgpu_ring_write(ring, lower_32_bits(seq));
6534         amdgpu_ring_write(ring, upper_32_bits(seq));
6535 }
6536
6537 static void gfx_v8_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
6538                                          u64 seq, unsigned int flags)
6539 {
6540         /* we only allocate 32bit for each seq wb address */
6541         BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
6542
6543         /* write fence seq to the "addr" */
6544         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6545         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6546                                  WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
6547         amdgpu_ring_write(ring, lower_32_bits(addr));
6548         amdgpu_ring_write(ring, upper_32_bits(addr));
6549         amdgpu_ring_write(ring, lower_32_bits(seq));
6550
6551         if (flags & AMDGPU_FENCE_FLAG_INT) {
6552                 /* set register to trigger INT */
6553                 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6554                 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6555                                          WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
6556                 amdgpu_ring_write(ring, mmCPC_INT_STATUS);
6557                 amdgpu_ring_write(ring, 0);
6558                 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
6559         }
6560 }
6561
6562 static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring)
6563 {
6564         amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6565         amdgpu_ring_write(ring, 0);
6566 }
6567
6568 static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
6569 {
6570         uint32_t dw2 = 0;
6571
6572         if (amdgpu_sriov_vf(ring->adev))
6573                 gfx_v8_0_ring_emit_ce_meta(ring);
6574
6575         dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
6576         if (flags & AMDGPU_HAVE_CTX_SWITCH) {
6577                 gfx_v8_0_ring_emit_vgt_flush(ring);
6578                 /* set load_global_config & load_global_uconfig */
6579                 dw2 |= 0x8001;
6580                 /* set load_cs_sh_regs */
6581                 dw2 |= 0x01000000;
6582                 /* set load_per_context_state & load_gfx_sh_regs for GFX */
6583                 dw2 |= 0x10002;
6584
6585                 /* set load_ce_ram if preamble presented */
6586                 if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
6587                         dw2 |= 0x10000000;
6588         } else {
6589                 /* still load_ce_ram if this is the first time preamble presented
6590                  * although there is no context switch happens.
6591                  */
6592                 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
6593                         dw2 |= 0x10000000;
6594         }
6595
6596         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6597         amdgpu_ring_write(ring, dw2);
6598         amdgpu_ring_write(ring, 0);
6599 }
6600
6601 static unsigned gfx_v8_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
6602 {
6603         unsigned ret;
6604
6605         amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
6606         amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
6607         amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
6608         amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
6609         ret = ring->wptr & ring->buf_mask;
6610         amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
6611         return ret;
6612 }
6613
6614 static void gfx_v8_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
6615 {
6616         unsigned cur;
6617
6618         BUG_ON(offset > ring->buf_mask);
6619         BUG_ON(ring->ring[offset] != 0x55aa55aa);
6620
6621         cur = (ring->wptr & ring->buf_mask) - 1;
6622         if (likely(cur > offset))
6623                 ring->ring[offset] = cur - offset;
6624         else
6625                 ring->ring[offset] = (ring->ring_size >> 2) - offset + cur;
6626 }
6627
6628 static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
6629 {
6630         struct amdgpu_device *adev = ring->adev;
6631
6632         amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
6633         amdgpu_ring_write(ring, 0 |     /* src: register*/
6634                                 (5 << 8) |      /* dst: memory */
6635                                 (1 << 20));     /* write confirm */
6636         amdgpu_ring_write(ring, reg);
6637         amdgpu_ring_write(ring, 0);
6638         amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
6639                                 adev->virt.reg_val_offs * 4));
6640         amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
6641                                 adev->virt.reg_val_offs * 4));
6642 }
6643
6644 static void gfx_v8_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
6645                                   uint32_t val)
6646 {
6647         uint32_t cmd;
6648
6649         switch (ring->funcs->type) {
6650         case AMDGPU_RING_TYPE_GFX:
6651                 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
6652                 break;
6653         case AMDGPU_RING_TYPE_KIQ:
6654                 cmd = 1 << 16; /* no inc addr */
6655                 break;
6656         default:
6657                 cmd = WR_CONFIRM;
6658                 break;
6659         }
6660
6661         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6662         amdgpu_ring_write(ring, cmd);
6663         amdgpu_ring_write(ring, reg);
6664         amdgpu_ring_write(ring, 0);
6665         amdgpu_ring_write(ring, val);
6666 }
6667
6668 static void gfx_v8_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
6669 {
6670         struct amdgpu_device *adev = ring->adev;
6671         uint32_t value = 0;
6672
6673         value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
6674         value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
6675         value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
6676         value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
6677         WREG32(mmSQ_CMD, value);
6678 }
6679
6680 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
6681                                                  enum amdgpu_interrupt_state state)
6682 {
6683         WREG32_FIELD(CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE,
6684                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6685 }
6686
6687 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
6688                                                      int me, int pipe,
6689                                                      enum amdgpu_interrupt_state state)
6690 {
6691         u32 mec_int_cntl, mec_int_cntl_reg;
6692
6693         /*
6694          * amdgpu controls only the first MEC. That's why this function only
6695          * handles the setting of interrupts for this specific MEC. All other
6696          * pipes' interrupts are set by amdkfd.
6697          */
6698
6699         if (me == 1) {
6700                 switch (pipe) {
6701                 case 0:
6702                         mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
6703                         break;
6704                 case 1:
6705                         mec_int_cntl_reg = mmCP_ME1_PIPE1_INT_CNTL;
6706                         break;
6707                 case 2:
6708                         mec_int_cntl_reg = mmCP_ME1_PIPE2_INT_CNTL;
6709                         break;
6710                 case 3:
6711                         mec_int_cntl_reg = mmCP_ME1_PIPE3_INT_CNTL;
6712                         break;
6713                 default:
6714                         DRM_DEBUG("invalid pipe %d\n", pipe);
6715                         return;
6716                 }
6717         } else {
6718                 DRM_DEBUG("invalid me %d\n", me);
6719                 return;
6720         }
6721
6722         switch (state) {
6723         case AMDGPU_IRQ_STATE_DISABLE:
6724                 mec_int_cntl = RREG32(mec_int_cntl_reg);
6725                 mec_int_cntl &= ~CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6726                 WREG32(mec_int_cntl_reg, mec_int_cntl);
6727                 break;
6728         case AMDGPU_IRQ_STATE_ENABLE:
6729                 mec_int_cntl = RREG32(mec_int_cntl_reg);
6730                 mec_int_cntl |= CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6731                 WREG32(mec_int_cntl_reg, mec_int_cntl);
6732                 break;
6733         default:
6734                 break;
6735         }
6736 }
6737
6738 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
6739                                              struct amdgpu_irq_src *source,
6740                                              unsigned type,
6741                                              enum amdgpu_interrupt_state state)
6742 {
6743         WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE,
6744                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6745
6746         return 0;
6747 }
6748
6749 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
6750                                               struct amdgpu_irq_src *source,
6751                                               unsigned type,
6752                                               enum amdgpu_interrupt_state state)
6753 {
6754         WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE,
6755                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6756
6757         return 0;
6758 }
6759
6760 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6761                                             struct amdgpu_irq_src *src,
6762                                             unsigned type,
6763                                             enum amdgpu_interrupt_state state)
6764 {
6765         switch (type) {
6766         case AMDGPU_CP_IRQ_GFX_EOP:
6767                 gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
6768                 break;
6769         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6770                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6771                 break;
6772         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6773                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6774                 break;
6775         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6776                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6777                 break;
6778         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6779                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6780                 break;
6781         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6782                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6783                 break;
6784         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6785                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6786                 break;
6787         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6788                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6789                 break;
6790         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6791                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6792                 break;
6793         default:
6794                 break;
6795         }
6796         return 0;
6797 }
6798
6799 static int gfx_v8_0_set_cp_ecc_int_state(struct amdgpu_device *adev,
6800                                          struct amdgpu_irq_src *source,
6801                                          unsigned int type,
6802                                          enum amdgpu_interrupt_state state)
6803 {
6804         int enable_flag;
6805
6806         switch (state) {
6807         case AMDGPU_IRQ_STATE_DISABLE:
6808                 enable_flag = 0;
6809                 break;
6810
6811         case AMDGPU_IRQ_STATE_ENABLE:
6812                 enable_flag = 1;
6813                 break;
6814
6815         default:
6816                 return -EINVAL;
6817         }
6818
6819         WREG32_FIELD(CP_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6820         WREG32_FIELD(CP_INT_CNTL_RING0, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6821         WREG32_FIELD(CP_INT_CNTL_RING1, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6822         WREG32_FIELD(CP_INT_CNTL_RING2, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6823         WREG32_FIELD(CPC_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6824         WREG32_FIELD(CP_ME1_PIPE0_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6825                      enable_flag);
6826         WREG32_FIELD(CP_ME1_PIPE1_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6827                      enable_flag);
6828         WREG32_FIELD(CP_ME1_PIPE2_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6829                      enable_flag);
6830         WREG32_FIELD(CP_ME1_PIPE3_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6831                      enable_flag);
6832         WREG32_FIELD(CP_ME2_PIPE0_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6833                      enable_flag);
6834         WREG32_FIELD(CP_ME2_PIPE1_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6835                      enable_flag);
6836         WREG32_FIELD(CP_ME2_PIPE2_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6837                      enable_flag);
6838         WREG32_FIELD(CP_ME2_PIPE3_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6839                      enable_flag);
6840
6841         return 0;
6842 }
6843
6844 static int gfx_v8_0_set_sq_int_state(struct amdgpu_device *adev,
6845                                      struct amdgpu_irq_src *source,
6846                                      unsigned int type,
6847                                      enum amdgpu_interrupt_state state)
6848 {
6849         int enable_flag;
6850
6851         switch (state) {
6852         case AMDGPU_IRQ_STATE_DISABLE:
6853                 enable_flag = 1;
6854                 break;
6855
6856         case AMDGPU_IRQ_STATE_ENABLE:
6857                 enable_flag = 0;
6858                 break;
6859
6860         default:
6861                 return -EINVAL;
6862         }
6863
6864         WREG32_FIELD(SQ_INTERRUPT_MSG_CTRL, STALL,
6865                      enable_flag);
6866
6867         return 0;
6868 }
6869
6870 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
6871                             struct amdgpu_irq_src *source,
6872                             struct amdgpu_iv_entry *entry)
6873 {
6874         int i;
6875         u8 me_id, pipe_id, queue_id;
6876         struct amdgpu_ring *ring;
6877
6878         DRM_DEBUG("IH: CP EOP\n");
6879         me_id = (entry->ring_id & 0x0c) >> 2;
6880         pipe_id = (entry->ring_id & 0x03) >> 0;
6881         queue_id = (entry->ring_id & 0x70) >> 4;
6882
6883         switch (me_id) {
6884         case 0:
6885                 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6886                 break;
6887         case 1:
6888         case 2:
6889                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6890                         ring = &adev->gfx.compute_ring[i];
6891                         /* Per-queue interrupt is supported for MEC starting from VI.
6892                           * The interrupt can only be enabled/disabled per pipe instead of per queue.
6893                           */
6894                         if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6895                                 amdgpu_fence_process(ring);
6896                 }
6897                 break;
6898         }
6899         return 0;
6900 }
6901
6902 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
6903                                  struct amdgpu_irq_src *source,
6904                                  struct amdgpu_iv_entry *entry)
6905 {
6906         DRM_ERROR("Illegal register access in command stream\n");
6907         schedule_work(&adev->reset_work);
6908         return 0;
6909 }
6910
6911 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
6912                                   struct amdgpu_irq_src *source,
6913                                   struct amdgpu_iv_entry *entry)
6914 {
6915         DRM_ERROR("Illegal instruction in command stream\n");
6916         schedule_work(&adev->reset_work);
6917         return 0;
6918 }
6919
6920 static int gfx_v8_0_cp_ecc_error_irq(struct amdgpu_device *adev,
6921                                      struct amdgpu_irq_src *source,
6922                                      struct amdgpu_iv_entry *entry)
6923 {
6924         DRM_ERROR("CP EDC/ECC error detected.");
6925         return 0;
6926 }
6927
6928 static void gfx_v8_0_parse_sq_irq(struct amdgpu_device *adev, unsigned ih_data)
6929 {
6930         u32 enc, se_id, sh_id, cu_id;
6931         char type[20];
6932         int sq_edc_source = -1;
6933
6934         enc = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_CMN, ENCODING);
6935         se_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_CMN, SE_ID);
6936
6937         switch (enc) {
6938                 case 0:
6939                         DRM_INFO("SQ general purpose intr detected:"
6940                                         "se_id %d, immed_overflow %d, host_reg_overflow %d,"
6941                                         "host_cmd_overflow %d, cmd_timestamp %d,"
6942                                         "reg_timestamp %d, thread_trace_buff_full %d,"
6943                                         "wlt %d, thread_trace %d.\n",
6944                                         se_id,
6945                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, IMMED_OVERFLOW),
6946                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, HOST_REG_OVERFLOW),
6947                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, HOST_CMD_OVERFLOW),
6948                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, CMD_TIMESTAMP),
6949                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, REG_TIMESTAMP),
6950                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, THREAD_TRACE_BUF_FULL),
6951                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, WLT),
6952                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, THREAD_TRACE)
6953                                         );
6954                         break;
6955                 case 1:
6956                 case 2:
6957
6958                         cu_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, CU_ID);
6959                         sh_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, SH_ID);
6960
6961                         /*
6962                          * This function can be called either directly from ISR
6963                          * or from BH in which case we can access SQ_EDC_INFO
6964                          * instance
6965                          */
6966                         if (in_task()) {
6967                                 mutex_lock(&adev->grbm_idx_mutex);
6968                                 gfx_v8_0_select_se_sh(adev, se_id, sh_id, cu_id);
6969
6970                                 sq_edc_source = REG_GET_FIELD(RREG32(mmSQ_EDC_INFO), SQ_EDC_INFO, SOURCE);
6971
6972                                 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6973                                 mutex_unlock(&adev->grbm_idx_mutex);
6974                         }
6975
6976                         if (enc == 1)
6977                                 sprintf(type, "instruction intr");
6978                         else
6979                                 sprintf(type, "EDC/ECC error");
6980
6981                         DRM_INFO(
6982                                 "SQ %s detected: "
6983                                         "se_id %d, sh_id %d, cu_id %d, simd_id %d, wave_id %d, vm_id %d "
6984                                         "trap %s, sq_ed_info.source %s.\n",
6985                                         type, se_id, sh_id, cu_id,
6986                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, SIMD_ID),
6987                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, WAVE_ID),
6988                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, VM_ID),
6989                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, PRIV) ? "true" : "false",
6990                                         (sq_edc_source != -1) ? sq_edc_source_names[sq_edc_source] : "unavailable"
6991                                 );
6992                         break;
6993                 default:
6994                         DRM_ERROR("SQ invalid encoding type\n.");
6995         }
6996 }
6997
6998 static void gfx_v8_0_sq_irq_work_func(struct work_struct *work)
6999 {
7000
7001         struct amdgpu_device *adev = container_of(work, struct amdgpu_device, gfx.sq_work.work);
7002         struct sq_work *sq_work = container_of(work, struct sq_work, work);
7003
7004         gfx_v8_0_parse_sq_irq(adev, sq_work->ih_data);
7005 }
7006
7007 static int gfx_v8_0_sq_irq(struct amdgpu_device *adev,
7008                            struct amdgpu_irq_src *source,
7009                            struct amdgpu_iv_entry *entry)
7010 {
7011         unsigned ih_data = entry->src_data[0];
7012
7013         /*
7014          * Try to submit work so SQ_EDC_INFO can be accessed from
7015          * BH. If previous work submission hasn't finished yet
7016          * just print whatever info is possible directly from the ISR.
7017          */
7018         if (work_pending(&adev->gfx.sq_work.work)) {
7019                 gfx_v8_0_parse_sq_irq(adev, ih_data);
7020         } else {
7021                 adev->gfx.sq_work.ih_data = ih_data;
7022                 schedule_work(&adev->gfx.sq_work.work);
7023         }
7024
7025         return 0;
7026 }
7027
7028 static int gfx_v8_0_kiq_set_interrupt_state(struct amdgpu_device *adev,
7029                                             struct amdgpu_irq_src *src,
7030                                             unsigned int type,
7031                                             enum amdgpu_interrupt_state state)
7032 {
7033         struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
7034
7035         switch (type) {
7036         case AMDGPU_CP_KIQ_IRQ_DRIVER0:
7037                 WREG32_FIELD(CPC_INT_CNTL, GENERIC2_INT_ENABLE,
7038                              state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
7039                 if (ring->me == 1)
7040                         WREG32_FIELD_OFFSET(CP_ME1_PIPE0_INT_CNTL,
7041                                      ring->pipe,
7042                                      GENERIC2_INT_ENABLE,
7043                                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
7044                 else
7045                         WREG32_FIELD_OFFSET(CP_ME2_PIPE0_INT_CNTL,
7046                                      ring->pipe,
7047                                      GENERIC2_INT_ENABLE,
7048                                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
7049                 break;
7050         default:
7051                 BUG(); /* kiq only support GENERIC2_INT now */
7052                 break;
7053         }
7054         return 0;
7055 }
7056
7057 static int gfx_v8_0_kiq_irq(struct amdgpu_device *adev,
7058                             struct amdgpu_irq_src *source,
7059                             struct amdgpu_iv_entry *entry)
7060 {
7061         u8 me_id, pipe_id, queue_id;
7062         struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
7063
7064         me_id = (entry->ring_id & 0x0c) >> 2;
7065         pipe_id = (entry->ring_id & 0x03) >> 0;
7066         queue_id = (entry->ring_id & 0x70) >> 4;
7067         DRM_DEBUG("IH: CPC GENERIC2_INT, me:%d, pipe:%d, queue:%d\n",
7068                    me_id, pipe_id, queue_id);
7069
7070         amdgpu_fence_process(ring);
7071         return 0;
7072 }
7073
7074 static const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
7075         .name = "gfx_v8_0",
7076         .early_init = gfx_v8_0_early_init,
7077         .late_init = gfx_v8_0_late_init,
7078         .sw_init = gfx_v8_0_sw_init,
7079         .sw_fini = gfx_v8_0_sw_fini,
7080         .hw_init = gfx_v8_0_hw_init,
7081         .hw_fini = gfx_v8_0_hw_fini,
7082         .suspend = gfx_v8_0_suspend,
7083         .resume = gfx_v8_0_resume,
7084         .is_idle = gfx_v8_0_is_idle,
7085         .wait_for_idle = gfx_v8_0_wait_for_idle,
7086         .check_soft_reset = gfx_v8_0_check_soft_reset,
7087         .pre_soft_reset = gfx_v8_0_pre_soft_reset,
7088         .soft_reset = gfx_v8_0_soft_reset,
7089         .post_soft_reset = gfx_v8_0_post_soft_reset,
7090         .set_clockgating_state = gfx_v8_0_set_clockgating_state,
7091         .set_powergating_state = gfx_v8_0_set_powergating_state,
7092         .get_clockgating_state = gfx_v8_0_get_clockgating_state,
7093 };
7094
7095 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
7096         .type = AMDGPU_RING_TYPE_GFX,
7097         .align_mask = 0xff,
7098         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
7099         .support_64bit_ptrs = false,
7100         .get_rptr = gfx_v8_0_ring_get_rptr,
7101         .get_wptr = gfx_v8_0_ring_get_wptr_gfx,
7102         .set_wptr = gfx_v8_0_ring_set_wptr_gfx,
7103         .emit_frame_size = /* maximum 215dw if count 16 IBs in */
7104                 5 +  /* COND_EXEC */
7105                 7 +  /* PIPELINE_SYNC */
7106                 VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 9 + /* VM_FLUSH */
7107                 8 +  /* FENCE for VM_FLUSH */
7108                 20 + /* GDS switch */
7109                 4 + /* double SWITCH_BUFFER,
7110                        the first COND_EXEC jump to the place just
7111                            prior to this double SWITCH_BUFFER  */
7112                 5 + /* COND_EXEC */
7113                 7 +      /*     HDP_flush */
7114                 4 +      /*     VGT_flush */
7115                 14 + /* CE_META */
7116                 31 + /* DE_META */
7117                 3 + /* CNTX_CTRL */
7118                 5 + /* HDP_INVL */
7119                 8 + 8 + /* FENCE x2 */
7120                 2, /* SWITCH_BUFFER */
7121         .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_gfx */
7122         .emit_ib = gfx_v8_0_ring_emit_ib_gfx,
7123         .emit_fence = gfx_v8_0_ring_emit_fence_gfx,
7124         .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
7125         .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
7126         .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
7127         .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
7128         .test_ring = gfx_v8_0_ring_test_ring,
7129         .test_ib = gfx_v8_0_ring_test_ib,
7130         .insert_nop = amdgpu_ring_insert_nop,
7131         .pad_ib = amdgpu_ring_generic_pad_ib,
7132         .emit_switch_buffer = gfx_v8_ring_emit_sb,
7133         .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
7134         .init_cond_exec = gfx_v8_0_ring_emit_init_cond_exec,
7135         .patch_cond_exec = gfx_v8_0_ring_emit_patch_cond_exec,
7136         .emit_wreg = gfx_v8_0_ring_emit_wreg,
7137         .soft_recovery = gfx_v8_0_ring_soft_recovery,
7138 };
7139
7140 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
7141         .type = AMDGPU_RING_TYPE_COMPUTE,
7142         .align_mask = 0xff,
7143         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
7144         .support_64bit_ptrs = false,
7145         .get_rptr = gfx_v8_0_ring_get_rptr,
7146         .get_wptr = gfx_v8_0_ring_get_wptr_compute,
7147         .set_wptr = gfx_v8_0_ring_set_wptr_compute,
7148         .emit_frame_size =
7149                 20 + /* gfx_v8_0_ring_emit_gds_switch */
7150                 7 + /* gfx_v8_0_ring_emit_hdp_flush */
7151                 5 + /* hdp_invalidate */
7152                 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
7153                 VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v8_0_ring_emit_vm_flush */
7154                 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
7155         .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */
7156         .emit_ib = gfx_v8_0_ring_emit_ib_compute,
7157         .emit_fence = gfx_v8_0_ring_emit_fence_compute,
7158         .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
7159         .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
7160         .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
7161         .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
7162         .test_ring = gfx_v8_0_ring_test_ring,
7163         .test_ib = gfx_v8_0_ring_test_ib,
7164         .insert_nop = amdgpu_ring_insert_nop,
7165         .pad_ib = amdgpu_ring_generic_pad_ib,
7166         .set_priority = gfx_v8_0_ring_set_priority_compute,
7167         .emit_wreg = gfx_v8_0_ring_emit_wreg,
7168 };
7169
7170 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {
7171         .type = AMDGPU_RING_TYPE_KIQ,
7172         .align_mask = 0xff,
7173         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
7174         .support_64bit_ptrs = false,
7175         .get_rptr = gfx_v8_0_ring_get_rptr,
7176         .get_wptr = gfx_v8_0_ring_get_wptr_compute,
7177         .set_wptr = gfx_v8_0_ring_set_wptr_compute,
7178         .emit_frame_size =
7179                 20 + /* gfx_v8_0_ring_emit_gds_switch */
7180                 7 + /* gfx_v8_0_ring_emit_hdp_flush */
7181                 5 + /* hdp_invalidate */
7182                 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
7183                 17 + /* gfx_v8_0_ring_emit_vm_flush */
7184                 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */
7185         .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */
7186         .emit_ib = gfx_v8_0_ring_emit_ib_compute,
7187         .emit_fence = gfx_v8_0_ring_emit_fence_kiq,
7188         .test_ring = gfx_v8_0_ring_test_ring,
7189         .test_ib = gfx_v8_0_ring_test_ib,
7190         .insert_nop = amdgpu_ring_insert_nop,
7191         .pad_ib = amdgpu_ring_generic_pad_ib,
7192         .emit_rreg = gfx_v8_0_ring_emit_rreg,
7193         .emit_wreg = gfx_v8_0_ring_emit_wreg,
7194 };
7195
7196 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
7197 {
7198         int i;
7199
7200         adev->gfx.kiq.ring.funcs = &gfx_v8_0_ring_funcs_kiq;
7201
7202         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
7203                 adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
7204
7205         for (i = 0; i < adev->gfx.num_compute_rings; i++)
7206                 adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
7207 }
7208
7209 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
7210         .set = gfx_v8_0_set_eop_interrupt_state,
7211         .process = gfx_v8_0_eop_irq,
7212 };
7213
7214 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
7215         .set = gfx_v8_0_set_priv_reg_fault_state,
7216         .process = gfx_v8_0_priv_reg_irq,
7217 };
7218
7219 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
7220         .set = gfx_v8_0_set_priv_inst_fault_state,
7221         .process = gfx_v8_0_priv_inst_irq,
7222 };
7223
7224 static const struct amdgpu_irq_src_funcs gfx_v8_0_kiq_irq_funcs = {
7225         .set = gfx_v8_0_kiq_set_interrupt_state,
7226         .process = gfx_v8_0_kiq_irq,
7227 };
7228
7229 static const struct amdgpu_irq_src_funcs gfx_v8_0_cp_ecc_error_irq_funcs = {
7230         .set = gfx_v8_0_set_cp_ecc_int_state,
7231         .process = gfx_v8_0_cp_ecc_error_irq,
7232 };
7233
7234 static const struct amdgpu_irq_src_funcs gfx_v8_0_sq_irq_funcs = {
7235         .set = gfx_v8_0_set_sq_int_state,
7236         .process = gfx_v8_0_sq_irq,
7237 };
7238
7239 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
7240 {
7241         adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
7242         adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
7243
7244         adev->gfx.priv_reg_irq.num_types = 1;
7245         adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
7246
7247         adev->gfx.priv_inst_irq.num_types = 1;
7248         adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
7249
7250         adev->gfx.kiq.irq.num_types = AMDGPU_CP_KIQ_IRQ_LAST;
7251         adev->gfx.kiq.irq.funcs = &gfx_v8_0_kiq_irq_funcs;
7252
7253         adev->gfx.cp_ecc_error_irq.num_types = 1;
7254         adev->gfx.cp_ecc_error_irq.funcs = &gfx_v8_0_cp_ecc_error_irq_funcs;
7255
7256         adev->gfx.sq_irq.num_types = 1;
7257         adev->gfx.sq_irq.funcs = &gfx_v8_0_sq_irq_funcs;
7258 }
7259
7260 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
7261 {
7262         adev->gfx.rlc.funcs = &iceland_rlc_funcs;
7263 }
7264
7265 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
7266 {
7267         /* init asci gds info */
7268         adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
7269         adev->gds.gws.total_size = 64;
7270         adev->gds.oa.total_size = 16;
7271
7272         if (adev->gds.mem.total_size == 64 * 1024) {
7273                 adev->gds.mem.gfx_partition_size = 4096;
7274                 adev->gds.mem.cs_partition_size = 4096;
7275
7276                 adev->gds.gws.gfx_partition_size = 4;
7277                 adev->gds.gws.cs_partition_size = 4;
7278
7279                 adev->gds.oa.gfx_partition_size = 4;
7280                 adev->gds.oa.cs_partition_size = 1;
7281         } else {
7282                 adev->gds.mem.gfx_partition_size = 1024;
7283                 adev->gds.mem.cs_partition_size = 1024;
7284
7285                 adev->gds.gws.gfx_partition_size = 16;
7286                 adev->gds.gws.cs_partition_size = 16;
7287
7288                 adev->gds.oa.gfx_partition_size = 4;
7289                 adev->gds.oa.cs_partition_size = 4;
7290         }
7291 }
7292
7293 static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
7294                                                  u32 bitmap)
7295 {
7296         u32 data;
7297
7298         if (!bitmap)
7299                 return;
7300
7301         data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7302         data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7303
7304         WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
7305 }
7306
7307 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
7308 {
7309         u32 data, mask;
7310
7311         data =  RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) |
7312                 RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
7313
7314         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
7315
7316         return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask;
7317 }
7318
7319 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
7320 {
7321         int i, j, k, counter, active_cu_number = 0;
7322         u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
7323         struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
7324         unsigned disable_masks[4 * 2];
7325         u32 ao_cu_num;
7326
7327         memset(cu_info, 0, sizeof(*cu_info));
7328
7329         if (adev->flags & AMD_IS_APU)
7330                 ao_cu_num = 2;
7331         else
7332                 ao_cu_num = adev->gfx.config.max_cu_per_sh;
7333
7334         amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
7335
7336         mutex_lock(&adev->grbm_idx_mutex);
7337         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
7338                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
7339                         mask = 1;
7340                         ao_bitmap = 0;
7341                         counter = 0;
7342                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
7343                         if (i < 4 && j < 2)
7344                                 gfx_v8_0_set_user_cu_inactive_bitmap(
7345                                         adev, disable_masks[i * 2 + j]);
7346                         bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
7347                         cu_info->bitmap[i][j] = bitmap;
7348
7349                         for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
7350                                 if (bitmap & mask) {
7351                                         if (counter < ao_cu_num)
7352                                                 ao_bitmap |= mask;
7353                                         counter ++;
7354                                 }
7355                                 mask <<= 1;
7356                         }
7357                         active_cu_number += counter;
7358                         if (i < 2 && j < 2)
7359                                 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
7360                         cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
7361                 }
7362         }
7363         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
7364         mutex_unlock(&adev->grbm_idx_mutex);
7365
7366         cu_info->number = active_cu_number;
7367         cu_info->ao_cu_mask = ao_cu_mask;
7368         cu_info->simd_per_cu = NUM_SIMD_PER_CU;
7369         cu_info->max_waves_per_simd = 10;
7370         cu_info->max_scratch_slots_per_cu = 32;
7371         cu_info->wave_front_size = 64;
7372         cu_info->lds_size = 64;
7373 }
7374
7375 const struct amdgpu_ip_block_version gfx_v8_0_ip_block =
7376 {
7377         .type = AMD_IP_BLOCK_TYPE_GFX,
7378         .major = 8,
7379         .minor = 0,
7380         .rev = 0,
7381         .funcs = &gfx_v8_0_ip_funcs,
7382 };
7383
7384 const struct amdgpu_ip_block_version gfx_v8_1_ip_block =
7385 {
7386         .type = AMD_IP_BLOCK_TYPE_GFX,
7387         .major = 8,
7388         .minor = 1,
7389         .rev = 0,
7390         .funcs = &gfx_v8_0_ip_funcs,
7391 };
7392
7393 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
7394 {
7395         uint64_t ce_payload_addr;
7396         int cnt_ce;
7397         union {
7398                 struct vi_ce_ib_state regular;
7399                 struct vi_ce_ib_state_chained_ib chained;
7400         } ce_payload = {};
7401
7402         if (ring->adev->virt.chained_ib_support) {
7403                 ce_payload_addr = amdgpu_csa_vaddr(ring->adev) +
7404                         offsetof(struct vi_gfx_meta_data_chained_ib, ce_payload);
7405                 cnt_ce = (sizeof(ce_payload.chained) >> 2) + 4 - 2;
7406         } else {
7407                 ce_payload_addr = amdgpu_csa_vaddr(ring->adev) +
7408                         offsetof(struct vi_gfx_meta_data, ce_payload);
7409                 cnt_ce = (sizeof(ce_payload.regular) >> 2) + 4 - 2;
7410         }
7411
7412         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_ce));
7413         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
7414                                 WRITE_DATA_DST_SEL(8) |
7415                                 WR_CONFIRM) |
7416                                 WRITE_DATA_CACHE_POLICY(0));
7417         amdgpu_ring_write(ring, lower_32_bits(ce_payload_addr));
7418         amdgpu_ring_write(ring, upper_32_bits(ce_payload_addr));
7419         amdgpu_ring_write_multiple(ring, (void *)&ce_payload, cnt_ce - 2);
7420 }
7421
7422 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring)
7423 {
7424         uint64_t de_payload_addr, gds_addr, csa_addr;
7425         int cnt_de;
7426         union {
7427                 struct vi_de_ib_state regular;
7428                 struct vi_de_ib_state_chained_ib chained;
7429         } de_payload = {};
7430
7431         csa_addr = amdgpu_csa_vaddr(ring->adev);
7432         gds_addr = csa_addr + 4096;
7433         if (ring->adev->virt.chained_ib_support) {
7434                 de_payload.chained.gds_backup_addrlo = lower_32_bits(gds_addr);
7435                 de_payload.chained.gds_backup_addrhi = upper_32_bits(gds_addr);
7436                 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data_chained_ib, de_payload);
7437                 cnt_de = (sizeof(de_payload.chained) >> 2) + 4 - 2;
7438         } else {
7439                 de_payload.regular.gds_backup_addrlo = lower_32_bits(gds_addr);
7440                 de_payload.regular.gds_backup_addrhi = upper_32_bits(gds_addr);
7441                 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data, de_payload);
7442                 cnt_de = (sizeof(de_payload.regular) >> 2) + 4 - 2;
7443         }
7444
7445         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_de));
7446         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
7447                                 WRITE_DATA_DST_SEL(8) |
7448                                 WR_CONFIRM) |
7449                                 WRITE_DATA_CACHE_POLICY(0));
7450         amdgpu_ring_write(ring, lower_32_bits(de_payload_addr));
7451         amdgpu_ring_write(ring, upper_32_bits(de_payload_addr));
7452         amdgpu_ring_write_multiple(ring, (void *)&de_payload, cnt_de - 2);
7453 }
This page took 0.491455 seconds and 4 git commands to generate.