]> Git Repo - linux.git/blob - drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
drm/amdgpu: Modify the argument of emit_ib interface
[linux.git] / drivers / gpu / drm / amd / amdgpu / gfx_v8_0.c
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 #include <linux/kernel.h>
24 #include <linux/firmware.h>
25 #include <drm/drmP.h>
26 #include "amdgpu.h"
27 #include "amdgpu_gfx.h"
28 #include "vi.h"
29 #include "vi_structs.h"
30 #include "vid.h"
31 #include "amdgpu_ucode.h"
32 #include "amdgpu_atombios.h"
33 #include "atombios_i2c.h"
34 #include "clearstate_vi.h"
35
36 #include "gmc/gmc_8_2_d.h"
37 #include "gmc/gmc_8_2_sh_mask.h"
38
39 #include "oss/oss_3_0_d.h"
40 #include "oss/oss_3_0_sh_mask.h"
41
42 #include "bif/bif_5_0_d.h"
43 #include "bif/bif_5_0_sh_mask.h"
44 #include "gca/gfx_8_0_d.h"
45 #include "gca/gfx_8_0_enum.h"
46 #include "gca/gfx_8_0_sh_mask.h"
47 #include "gca/gfx_8_0_enum.h"
48
49 #include "dce/dce_10_0_d.h"
50 #include "dce/dce_10_0_sh_mask.h"
51
52 #include "smu/smu_7_1_3_d.h"
53
54 #include "ivsrcid/ivsrcid_vislands30.h"
55
56 #define GFX8_NUM_GFX_RINGS     1
57 #define GFX8_MEC_HPD_SIZE 4096
58
59 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
60 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
61 #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
62 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
63
64 #define ARRAY_MODE(x)                                   ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
65 #define PIPE_CONFIG(x)                                  ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
66 #define TILE_SPLIT(x)                                   ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
67 #define MICRO_TILE_MODE_NEW(x)                          ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
68 #define SAMPLE_SPLIT(x)                                 ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
69 #define BANK_WIDTH(x)                                   ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
70 #define BANK_HEIGHT(x)                                  ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
71 #define MACRO_TILE_ASPECT(x)                            ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
72 #define NUM_BANKS(x)                                    ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
73
74 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK            0x00000001L
75 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK            0x00000002L
76 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK           0x00000004L
77 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK           0x00000008L
78 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK           0x00000010L
79 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK           0x00000020L
80
81 /* BPM SERDES CMD */
82 #define SET_BPM_SERDES_CMD    1
83 #define CLE_BPM_SERDES_CMD    0
84
85 /* BPM Register Address*/
86 enum {
87         BPM_REG_CGLS_EN = 0,        /* Enable/Disable CGLS */
88         BPM_REG_CGLS_ON,            /* ON/OFF CGLS: shall be controlled by RLC FW */
89         BPM_REG_CGCG_OVERRIDE,      /* Set/Clear CGCG Override */
90         BPM_REG_MGCG_OVERRIDE,      /* Set/Clear MGCG Override */
91         BPM_REG_FGCG_OVERRIDE,      /* Set/Clear FGCG Override */
92         BPM_REG_FGCG_MAX
93 };
94
95 #define RLC_FormatDirectRegListLength        14
96
97 MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
98 MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
99 MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
100 MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
101 MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
102 MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
103
104 MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
105 MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
106 MODULE_FIRMWARE("amdgpu/stoney_me.bin");
107 MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
108 MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
109
110 MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
111 MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
112 MODULE_FIRMWARE("amdgpu/tonga_me.bin");
113 MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
114 MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
115 MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
116
117 MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
118 MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
119 MODULE_FIRMWARE("amdgpu/topaz_me.bin");
120 MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
121 MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
122
123 MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
124 MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
125 MODULE_FIRMWARE("amdgpu/fiji_me.bin");
126 MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
127 MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
128 MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
129
130 MODULE_FIRMWARE("amdgpu/polaris10_ce.bin");
131 MODULE_FIRMWARE("amdgpu/polaris10_ce_2.bin");
132 MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin");
133 MODULE_FIRMWARE("amdgpu/polaris10_pfp_2.bin");
134 MODULE_FIRMWARE("amdgpu/polaris10_me.bin");
135 MODULE_FIRMWARE("amdgpu/polaris10_me_2.bin");
136 MODULE_FIRMWARE("amdgpu/polaris10_mec.bin");
137 MODULE_FIRMWARE("amdgpu/polaris10_mec_2.bin");
138 MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin");
139 MODULE_FIRMWARE("amdgpu/polaris10_mec2_2.bin");
140 MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin");
141
142 MODULE_FIRMWARE("amdgpu/polaris11_ce.bin");
143 MODULE_FIRMWARE("amdgpu/polaris11_ce_2.bin");
144 MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin");
145 MODULE_FIRMWARE("amdgpu/polaris11_pfp_2.bin");
146 MODULE_FIRMWARE("amdgpu/polaris11_me.bin");
147 MODULE_FIRMWARE("amdgpu/polaris11_me_2.bin");
148 MODULE_FIRMWARE("amdgpu/polaris11_mec.bin");
149 MODULE_FIRMWARE("amdgpu/polaris11_mec_2.bin");
150 MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin");
151 MODULE_FIRMWARE("amdgpu/polaris11_mec2_2.bin");
152 MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin");
153
154 MODULE_FIRMWARE("amdgpu/polaris12_ce.bin");
155 MODULE_FIRMWARE("amdgpu/polaris12_ce_2.bin");
156 MODULE_FIRMWARE("amdgpu/polaris12_pfp.bin");
157 MODULE_FIRMWARE("amdgpu/polaris12_pfp_2.bin");
158 MODULE_FIRMWARE("amdgpu/polaris12_me.bin");
159 MODULE_FIRMWARE("amdgpu/polaris12_me_2.bin");
160 MODULE_FIRMWARE("amdgpu/polaris12_mec.bin");
161 MODULE_FIRMWARE("amdgpu/polaris12_mec_2.bin");
162 MODULE_FIRMWARE("amdgpu/polaris12_mec2.bin");
163 MODULE_FIRMWARE("amdgpu/polaris12_mec2_2.bin");
164 MODULE_FIRMWARE("amdgpu/polaris12_rlc.bin");
165
166 MODULE_FIRMWARE("amdgpu/vegam_ce.bin");
167 MODULE_FIRMWARE("amdgpu/vegam_pfp.bin");
168 MODULE_FIRMWARE("amdgpu/vegam_me.bin");
169 MODULE_FIRMWARE("amdgpu/vegam_mec.bin");
170 MODULE_FIRMWARE("amdgpu/vegam_mec2.bin");
171 MODULE_FIRMWARE("amdgpu/vegam_rlc.bin");
172
173 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
174 {
175         {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
176         {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
177         {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
178         {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
179         {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
180         {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
181         {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
182         {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
183         {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
184         {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
185         {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
186         {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
187         {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
188         {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
189         {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
190         {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
191 };
192
193 static const u32 golden_settings_tonga_a11[] =
194 {
195         mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
196         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
197         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
198         mmGB_GPU_ID, 0x0000000f, 0x00000000,
199         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
200         mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
201         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
202         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
203         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
204         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
205         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
206         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
207         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
208         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
209         mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
210         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
211 };
212
213 static const u32 tonga_golden_common_all[] =
214 {
215         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
216         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
217         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
218         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
219         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
220         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
221         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
222         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
223 };
224
225 static const u32 tonga_mgcg_cgcg_init[] =
226 {
227         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
228         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
229         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
230         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
231         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
232         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
233         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
234         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
235         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
236         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
237         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
238         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
239         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
240         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
241         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
242         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
243         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
244         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
245         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
246         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
247         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
248         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
249         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
250         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
251         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
252         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
253         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
254         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
255         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
256         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
257         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
258         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
259         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
260         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
261         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
262         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
263         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
264         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
265         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
266         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
267         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
268         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
269         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
270         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
271         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
272         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
273         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
274         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
275         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
276         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
277         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
278         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
279         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
280         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
281         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
282         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
283         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
284         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
285         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
286         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
287         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
288         mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
289         mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
290         mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
291         mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
292         mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
293         mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
294         mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
295         mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
296         mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
297         mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
298         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
299         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
300         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
301         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
302 };
303
304 static const u32 golden_settings_vegam_a11[] =
305 {
306         mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
307         mmCB_HW_CONTROL_2, 0x0f000000, 0x0d000000,
308         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
309         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
310         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
311         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
312         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x3a00161a,
313         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002e,
314         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
315         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
316         mmSQ_CONFIG, 0x07f80000, 0x01180000,
317         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
318         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
319         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
320         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
321         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x32761054,
322         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
323 };
324
325 static const u32 vegam_golden_common_all[] =
326 {
327         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
328         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
329         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
330         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
331         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
332         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
333 };
334
335 static const u32 golden_settings_polaris11_a11[] =
336 {
337         mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208,
338         mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
339         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
340         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
341         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
342         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
343         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
344         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
345         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
346         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
347         mmSQ_CONFIG, 0x07f80000, 0x01180000,
348         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
349         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
350         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
351         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
352         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
353         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
354 };
355
356 static const u32 polaris11_golden_common_all[] =
357 {
358         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
359         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002,
360         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
361         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
362         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
363         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
364 };
365
366 static const u32 golden_settings_polaris10_a11[] =
367 {
368         mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
369         mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
370         mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
371         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
372         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
373         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
374         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
375         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
376         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a,
377         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
378         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
379         mmSQ_CONFIG, 0x07f80000, 0x07180000,
380         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
381         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
382         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
383         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
384         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
385 };
386
387 static const u32 polaris10_golden_common_all[] =
388 {
389         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
390         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
391         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
392         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
393         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
394         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
395         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
396         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
397 };
398
399 static const u32 fiji_golden_common_all[] =
400 {
401         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
402         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
403         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
404         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
405         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
406         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
407         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
408         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
409         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
410         mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
411 };
412
413 static const u32 golden_settings_fiji_a10[] =
414 {
415         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
416         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
417         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
418         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
419         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
420         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
421         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
422         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
423         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
424         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
425         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
426 };
427
428 static const u32 fiji_mgcg_cgcg_init[] =
429 {
430         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
431         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
432         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
433         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
434         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
435         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
436         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
437         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
438         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
439         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
440         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
441         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
442         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
443         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
444         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
445         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
446         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
447         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
448         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
449         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
450         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
451         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
452         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
453         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
454         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
455         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
456         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
457         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
458         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
459         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
460         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
461         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
462         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
463         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
464         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
465 };
466
467 static const u32 golden_settings_iceland_a11[] =
468 {
469         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
470         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
471         mmDB_DEBUG3, 0xc0000000, 0xc0000000,
472         mmGB_GPU_ID, 0x0000000f, 0x00000000,
473         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
474         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
475         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
476         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
477         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
478         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
479         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
480         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
481         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
482         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
483         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
484         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
485 };
486
487 static const u32 iceland_golden_common_all[] =
488 {
489         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
490         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
491         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
492         mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
493         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
494         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
495         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
496         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
497 };
498
499 static const u32 iceland_mgcg_cgcg_init[] =
500 {
501         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
502         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
503         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
504         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
505         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
506         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
507         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
508         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
509         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
510         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
511         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
512         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
513         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
514         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
515         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
516         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
517         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
518         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
519         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
520         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
521         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
522         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
523         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
524         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
525         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
526         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
527         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
528         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
529         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
530         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
531         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
532         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
533         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
534         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
535         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
536         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
537         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
538         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
539         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
540         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
541         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
542         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
543         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
544         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
545         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
546         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
547         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
548         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
549         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
550         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
551         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
552         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
553         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
554         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
555         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
556         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
557         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
558         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
559         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
560         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
561         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
562         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
563         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
564         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
565 };
566
567 static const u32 cz_golden_settings_a11[] =
568 {
569         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
570         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
571         mmGB_GPU_ID, 0x0000000f, 0x00000000,
572         mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
573         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
574         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
575         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
576         mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
577         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
578         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
579         mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
580         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
581 };
582
583 static const u32 cz_golden_common_all[] =
584 {
585         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
586         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
587         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
588         mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
589         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
590         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
591         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
592         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
593 };
594
595 static const u32 cz_mgcg_cgcg_init[] =
596 {
597         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
598         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
599         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
600         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
601         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
602         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
603         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
604         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
605         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
606         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
607         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
608         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
609         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
610         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
611         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
612         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
613         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
614         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
615         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
616         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
617         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
618         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
619         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
620         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
621         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
622         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
623         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
624         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
625         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
626         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
627         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
628         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
629         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
630         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
631         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
632         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
633         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
634         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
635         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
636         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
637         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
638         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
639         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
640         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
641         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
642         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
643         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
644         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
645         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
646         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
647         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
648         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
649         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
650         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
651         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
652         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
653         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
654         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
655         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
656         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
657         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
658         mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
659         mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
660         mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
661         mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
662         mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
663         mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
664         mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
665         mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
666         mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
667         mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
668         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
669         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
670         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
671         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
672 };
673
674 static const u32 stoney_golden_settings_a11[] =
675 {
676         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
677         mmGB_GPU_ID, 0x0000000f, 0x00000000,
678         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
679         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
680         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
681         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
682         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
683         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
684         mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
685         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
686 };
687
688 static const u32 stoney_golden_common_all[] =
689 {
690         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
691         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
692         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
693         mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
694         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
695         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
696         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
697         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
698 };
699
700 static const u32 stoney_mgcg_cgcg_init[] =
701 {
702         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
703         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
704         mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
705         mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
706         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
707 };
708
709
710 static const char * const sq_edc_source_names[] = {
711         "SQ_EDC_INFO_SOURCE_INVALID: No EDC error has occurred",
712         "SQ_EDC_INFO_SOURCE_INST: EDC source is Instruction Fetch",
713         "SQ_EDC_INFO_SOURCE_SGPR: EDC source is SGPR or SQC data return",
714         "SQ_EDC_INFO_SOURCE_VGPR: EDC source is VGPR",
715         "SQ_EDC_INFO_SOURCE_LDS: EDC source is LDS",
716         "SQ_EDC_INFO_SOURCE_GDS: EDC source is GDS",
717         "SQ_EDC_INFO_SOURCE_TA: EDC source is TA",
718 };
719
720 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
721 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
722 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
723 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
724 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
725 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
726 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring);
727 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring);
728
729 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
730 {
731         switch (adev->asic_type) {
732         case CHIP_TOPAZ:
733                 amdgpu_device_program_register_sequence(adev,
734                                                         iceland_mgcg_cgcg_init,
735                                                         ARRAY_SIZE(iceland_mgcg_cgcg_init));
736                 amdgpu_device_program_register_sequence(adev,
737                                                         golden_settings_iceland_a11,
738                                                         ARRAY_SIZE(golden_settings_iceland_a11));
739                 amdgpu_device_program_register_sequence(adev,
740                                                         iceland_golden_common_all,
741                                                         ARRAY_SIZE(iceland_golden_common_all));
742                 break;
743         case CHIP_FIJI:
744                 amdgpu_device_program_register_sequence(adev,
745                                                         fiji_mgcg_cgcg_init,
746                                                         ARRAY_SIZE(fiji_mgcg_cgcg_init));
747                 amdgpu_device_program_register_sequence(adev,
748                                                         golden_settings_fiji_a10,
749                                                         ARRAY_SIZE(golden_settings_fiji_a10));
750                 amdgpu_device_program_register_sequence(adev,
751                                                         fiji_golden_common_all,
752                                                         ARRAY_SIZE(fiji_golden_common_all));
753                 break;
754
755         case CHIP_TONGA:
756                 amdgpu_device_program_register_sequence(adev,
757                                                         tonga_mgcg_cgcg_init,
758                                                         ARRAY_SIZE(tonga_mgcg_cgcg_init));
759                 amdgpu_device_program_register_sequence(adev,
760                                                         golden_settings_tonga_a11,
761                                                         ARRAY_SIZE(golden_settings_tonga_a11));
762                 amdgpu_device_program_register_sequence(adev,
763                                                         tonga_golden_common_all,
764                                                         ARRAY_SIZE(tonga_golden_common_all));
765                 break;
766         case CHIP_VEGAM:
767                 amdgpu_device_program_register_sequence(adev,
768                                                         golden_settings_vegam_a11,
769                                                         ARRAY_SIZE(golden_settings_vegam_a11));
770                 amdgpu_device_program_register_sequence(adev,
771                                                         vegam_golden_common_all,
772                                                         ARRAY_SIZE(vegam_golden_common_all));
773                 break;
774         case CHIP_POLARIS11:
775         case CHIP_POLARIS12:
776                 amdgpu_device_program_register_sequence(adev,
777                                                         golden_settings_polaris11_a11,
778                                                         ARRAY_SIZE(golden_settings_polaris11_a11));
779                 amdgpu_device_program_register_sequence(adev,
780                                                         polaris11_golden_common_all,
781                                                         ARRAY_SIZE(polaris11_golden_common_all));
782                 break;
783         case CHIP_POLARIS10:
784                 amdgpu_device_program_register_sequence(adev,
785                                                         golden_settings_polaris10_a11,
786                                                         ARRAY_SIZE(golden_settings_polaris10_a11));
787                 amdgpu_device_program_register_sequence(adev,
788                                                         polaris10_golden_common_all,
789                                                         ARRAY_SIZE(polaris10_golden_common_all));
790                 WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C);
791                 if (adev->pdev->revision == 0xc7 &&
792                     ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) ||
793                      (adev->pdev->subsystem_device == 0x4a8 && adev->pdev->subsystem_vendor == 0x1043) ||
794                      (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1682))) {
795                         amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD);
796                         amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0);
797                 }
798                 break;
799         case CHIP_CARRIZO:
800                 amdgpu_device_program_register_sequence(adev,
801                                                         cz_mgcg_cgcg_init,
802                                                         ARRAY_SIZE(cz_mgcg_cgcg_init));
803                 amdgpu_device_program_register_sequence(adev,
804                                                         cz_golden_settings_a11,
805                                                         ARRAY_SIZE(cz_golden_settings_a11));
806                 amdgpu_device_program_register_sequence(adev,
807                                                         cz_golden_common_all,
808                                                         ARRAY_SIZE(cz_golden_common_all));
809                 break;
810         case CHIP_STONEY:
811                 amdgpu_device_program_register_sequence(adev,
812                                                         stoney_mgcg_cgcg_init,
813                                                         ARRAY_SIZE(stoney_mgcg_cgcg_init));
814                 amdgpu_device_program_register_sequence(adev,
815                                                         stoney_golden_settings_a11,
816                                                         ARRAY_SIZE(stoney_golden_settings_a11));
817                 amdgpu_device_program_register_sequence(adev,
818                                                         stoney_golden_common_all,
819                                                         ARRAY_SIZE(stoney_golden_common_all));
820                 break;
821         default:
822                 break;
823         }
824 }
825
826 static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
827 {
828         adev->gfx.scratch.num_reg = 8;
829         adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
830         adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
831 }
832
833 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
834 {
835         struct amdgpu_device *adev = ring->adev;
836         uint32_t scratch;
837         uint32_t tmp = 0;
838         unsigned i;
839         int r;
840
841         r = amdgpu_gfx_scratch_get(adev, &scratch);
842         if (r)
843                 return r;
844
845         WREG32(scratch, 0xCAFEDEAD);
846         r = amdgpu_ring_alloc(ring, 3);
847         if (r)
848                 goto error_free_scratch;
849
850         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
851         amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
852         amdgpu_ring_write(ring, 0xDEADBEEF);
853         amdgpu_ring_commit(ring);
854
855         for (i = 0; i < adev->usec_timeout; i++) {
856                 tmp = RREG32(scratch);
857                 if (tmp == 0xDEADBEEF)
858                         break;
859                 DRM_UDELAY(1);
860         }
861
862         if (i >= adev->usec_timeout)
863                 r = -ETIMEDOUT;
864
865 error_free_scratch:
866         amdgpu_gfx_scratch_free(adev, scratch);
867         return r;
868 }
869
870 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
871 {
872         struct amdgpu_device *adev = ring->adev;
873         struct amdgpu_ib ib;
874         struct dma_fence *f = NULL;
875
876         unsigned int index;
877         uint64_t gpu_addr;
878         uint32_t tmp;
879         long r;
880
881         r = amdgpu_device_wb_get(adev, &index);
882         if (r)
883                 return r;
884
885         gpu_addr = adev->wb.gpu_addr + (index * 4);
886         adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
887         memset(&ib, 0, sizeof(ib));
888         r = amdgpu_ib_get(adev, NULL, 16, &ib);
889         if (r)
890                 goto err1;
891
892         ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
893         ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
894         ib.ptr[2] = lower_32_bits(gpu_addr);
895         ib.ptr[3] = upper_32_bits(gpu_addr);
896         ib.ptr[4] = 0xDEADBEEF;
897         ib.length_dw = 5;
898
899         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
900         if (r)
901                 goto err2;
902
903         r = dma_fence_wait_timeout(f, false, timeout);
904         if (r == 0) {
905                 r = -ETIMEDOUT;
906                 goto err2;
907         } else if (r < 0) {
908                 goto err2;
909         }
910
911         tmp = adev->wb.wb[index];
912         if (tmp == 0xDEADBEEF)
913                 r = 0;
914         else
915                 r = -EINVAL;
916
917 err2:
918         amdgpu_ib_free(adev, &ib, NULL);
919         dma_fence_put(f);
920 err1:
921         amdgpu_device_wb_free(adev, index);
922         return r;
923 }
924
925
926 static void gfx_v8_0_free_microcode(struct amdgpu_device *adev)
927 {
928         release_firmware(adev->gfx.pfp_fw);
929         adev->gfx.pfp_fw = NULL;
930         release_firmware(adev->gfx.me_fw);
931         adev->gfx.me_fw = NULL;
932         release_firmware(adev->gfx.ce_fw);
933         adev->gfx.ce_fw = NULL;
934         release_firmware(adev->gfx.rlc_fw);
935         adev->gfx.rlc_fw = NULL;
936         release_firmware(adev->gfx.mec_fw);
937         adev->gfx.mec_fw = NULL;
938         if ((adev->asic_type != CHIP_STONEY) &&
939             (adev->asic_type != CHIP_TOPAZ))
940                 release_firmware(adev->gfx.mec2_fw);
941         adev->gfx.mec2_fw = NULL;
942
943         kfree(adev->gfx.rlc.register_list_format);
944 }
945
946 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
947 {
948         const char *chip_name;
949         char fw_name[30];
950         int err;
951         struct amdgpu_firmware_info *info = NULL;
952         const struct common_firmware_header *header = NULL;
953         const struct gfx_firmware_header_v1_0 *cp_hdr;
954         const struct rlc_firmware_header_v2_0 *rlc_hdr;
955         unsigned int *tmp = NULL, i;
956
957         DRM_DEBUG("\n");
958
959         switch (adev->asic_type) {
960         case CHIP_TOPAZ:
961                 chip_name = "topaz";
962                 break;
963         case CHIP_TONGA:
964                 chip_name = "tonga";
965                 break;
966         case CHIP_CARRIZO:
967                 chip_name = "carrizo";
968                 break;
969         case CHIP_FIJI:
970                 chip_name = "fiji";
971                 break;
972         case CHIP_STONEY:
973                 chip_name = "stoney";
974                 break;
975         case CHIP_POLARIS10:
976                 chip_name = "polaris10";
977                 break;
978         case CHIP_POLARIS11:
979                 chip_name = "polaris11";
980                 break;
981         case CHIP_POLARIS12:
982                 chip_name = "polaris12";
983                 break;
984         case CHIP_VEGAM:
985                 chip_name = "vegam";
986                 break;
987         default:
988                 BUG();
989         }
990
991         if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
992                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp_2.bin", chip_name);
993                 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
994                 if (err == -ENOENT) {
995                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
996                         err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
997                 }
998         } else {
999                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
1000                 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
1001         }
1002         if (err)
1003                 goto out;
1004         err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
1005         if (err)
1006                 goto out;
1007         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1008         adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1009         adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1010
1011         if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1012                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me_2.bin", chip_name);
1013                 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1014                 if (err == -ENOENT) {
1015                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1016                         err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1017                 }
1018         } else {
1019                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1020                 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1021         }
1022         if (err)
1023                 goto out;
1024         err = amdgpu_ucode_validate(adev->gfx.me_fw);
1025         if (err)
1026                 goto out;
1027         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1028         adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1029
1030         adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1031
1032         if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1033                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce_2.bin", chip_name);
1034                 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1035                 if (err == -ENOENT) {
1036                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1037                         err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1038                 }
1039         } else {
1040                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1041                 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1042         }
1043         if (err)
1044                 goto out;
1045         err = amdgpu_ucode_validate(adev->gfx.ce_fw);
1046         if (err)
1047                 goto out;
1048         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1049         adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1050         adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1051
1052         /*
1053          * Support for MCBP/Virtualization in combination with chained IBs is
1054          * formal released on feature version #46
1055          */
1056         if (adev->gfx.ce_feature_version >= 46 &&
1057             adev->gfx.pfp_feature_version >= 46) {
1058                 adev->virt.chained_ib_support = true;
1059                 DRM_INFO("Chained IB support enabled!\n");
1060         } else
1061                 adev->virt.chained_ib_support = false;
1062
1063         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
1064         err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
1065         if (err)
1066                 goto out;
1067         err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
1068         rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1069         adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
1070         adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
1071
1072         adev->gfx.rlc.save_and_restore_offset =
1073                         le32_to_cpu(rlc_hdr->save_and_restore_offset);
1074         adev->gfx.rlc.clear_state_descriptor_offset =
1075                         le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
1076         adev->gfx.rlc.avail_scratch_ram_locations =
1077                         le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
1078         adev->gfx.rlc.reg_restore_list_size =
1079                         le32_to_cpu(rlc_hdr->reg_restore_list_size);
1080         adev->gfx.rlc.reg_list_format_start =
1081                         le32_to_cpu(rlc_hdr->reg_list_format_start);
1082         adev->gfx.rlc.reg_list_format_separate_start =
1083                         le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
1084         adev->gfx.rlc.starting_offsets_start =
1085                         le32_to_cpu(rlc_hdr->starting_offsets_start);
1086         adev->gfx.rlc.reg_list_format_size_bytes =
1087                         le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
1088         adev->gfx.rlc.reg_list_size_bytes =
1089                         le32_to_cpu(rlc_hdr->reg_list_size_bytes);
1090
1091         adev->gfx.rlc.register_list_format =
1092                         kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
1093                                         adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
1094
1095         if (!adev->gfx.rlc.register_list_format) {
1096                 err = -ENOMEM;
1097                 goto out;
1098         }
1099
1100         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1101                         le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1102         for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++)
1103                 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
1104
1105         adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1106
1107         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1108                         le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1109         for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++)
1110                 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
1111
1112         if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1113                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec_2.bin", chip_name);
1114                 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1115                 if (err == -ENOENT) {
1116                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1117                         err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1118                 }
1119         } else {
1120                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1121                 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1122         }
1123         if (err)
1124                 goto out;
1125         err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1126         if (err)
1127                 goto out;
1128         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1129         adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1130         adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1131
1132         if ((adev->asic_type != CHIP_STONEY) &&
1133             (adev->asic_type != CHIP_TOPAZ)) {
1134                 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1135                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2_2.bin", chip_name);
1136                         err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1137                         if (err == -ENOENT) {
1138                                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1139                                 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1140                         }
1141                 } else {
1142                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1143                         err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1144                 }
1145                 if (!err) {
1146                         err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1147                         if (err)
1148                                 goto out;
1149                         cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1150                                 adev->gfx.mec2_fw->data;
1151                         adev->gfx.mec2_fw_version =
1152                                 le32_to_cpu(cp_hdr->header.ucode_version);
1153                         adev->gfx.mec2_feature_version =
1154                                 le32_to_cpu(cp_hdr->ucode_feature_version);
1155                 } else {
1156                         err = 0;
1157                         adev->gfx.mec2_fw = NULL;
1158                 }
1159         }
1160
1161         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1162         info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1163         info->fw = adev->gfx.pfp_fw;
1164         header = (const struct common_firmware_header *)info->fw->data;
1165         adev->firmware.fw_size +=
1166                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1167
1168         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1169         info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1170         info->fw = adev->gfx.me_fw;
1171         header = (const struct common_firmware_header *)info->fw->data;
1172         adev->firmware.fw_size +=
1173                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1174
1175         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1176         info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1177         info->fw = adev->gfx.ce_fw;
1178         header = (const struct common_firmware_header *)info->fw->data;
1179         adev->firmware.fw_size +=
1180                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1181
1182         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1183         info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1184         info->fw = adev->gfx.rlc_fw;
1185         header = (const struct common_firmware_header *)info->fw->data;
1186         adev->firmware.fw_size +=
1187                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1188
1189         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1190         info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1191         info->fw = adev->gfx.mec_fw;
1192         header = (const struct common_firmware_header *)info->fw->data;
1193         adev->firmware.fw_size +=
1194                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1195
1196         /* we need account JT in */
1197         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1198         adev->firmware.fw_size +=
1199                 ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE);
1200
1201         if (amdgpu_sriov_vf(adev)) {
1202                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_STORAGE];
1203                 info->ucode_id = AMDGPU_UCODE_ID_STORAGE;
1204                 info->fw = adev->gfx.mec_fw;
1205                 adev->firmware.fw_size +=
1206                         ALIGN(le32_to_cpu(64 * PAGE_SIZE), PAGE_SIZE);
1207         }
1208
1209         if (adev->gfx.mec2_fw) {
1210                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1211                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1212                 info->fw = adev->gfx.mec2_fw;
1213                 header = (const struct common_firmware_header *)info->fw->data;
1214                 adev->firmware.fw_size +=
1215                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1216         }
1217
1218 out:
1219         if (err) {
1220                 dev_err(adev->dev,
1221                         "gfx8: Failed to load firmware \"%s\"\n",
1222                         fw_name);
1223                 release_firmware(adev->gfx.pfp_fw);
1224                 adev->gfx.pfp_fw = NULL;
1225                 release_firmware(adev->gfx.me_fw);
1226                 adev->gfx.me_fw = NULL;
1227                 release_firmware(adev->gfx.ce_fw);
1228                 adev->gfx.ce_fw = NULL;
1229                 release_firmware(adev->gfx.rlc_fw);
1230                 adev->gfx.rlc_fw = NULL;
1231                 release_firmware(adev->gfx.mec_fw);
1232                 adev->gfx.mec_fw = NULL;
1233                 release_firmware(adev->gfx.mec2_fw);
1234                 adev->gfx.mec2_fw = NULL;
1235         }
1236         return err;
1237 }
1238
1239 static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
1240                                     volatile u32 *buffer)
1241 {
1242         u32 count = 0, i;
1243         const struct cs_section_def *sect = NULL;
1244         const struct cs_extent_def *ext = NULL;
1245
1246         if (adev->gfx.rlc.cs_data == NULL)
1247                 return;
1248         if (buffer == NULL)
1249                 return;
1250
1251         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1252         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1253
1254         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1255         buffer[count++] = cpu_to_le32(0x80000000);
1256         buffer[count++] = cpu_to_le32(0x80000000);
1257
1258         for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1259                 for (ext = sect->section; ext->extent != NULL; ++ext) {
1260                         if (sect->id == SECT_CONTEXT) {
1261                                 buffer[count++] =
1262                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1263                                 buffer[count++] = cpu_to_le32(ext->reg_index -
1264                                                 PACKET3_SET_CONTEXT_REG_START);
1265                                 for (i = 0; i < ext->reg_count; i++)
1266                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
1267                         } else {
1268                                 return;
1269                         }
1270                 }
1271         }
1272
1273         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1274         buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
1275                         PACKET3_SET_CONTEXT_REG_START);
1276         buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config);
1277         buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1);
1278
1279         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1280         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1281
1282         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1283         buffer[count++] = cpu_to_le32(0);
1284 }
1285
1286 static void cz_init_cp_jump_table(struct amdgpu_device *adev)
1287 {
1288         const __le32 *fw_data;
1289         volatile u32 *dst_ptr;
1290         int me, i, max_me = 4;
1291         u32 bo_offset = 0;
1292         u32 table_offset, table_size;
1293
1294         if (adev->asic_type == CHIP_CARRIZO)
1295                 max_me = 5;
1296
1297         /* write the cp table buffer */
1298         dst_ptr = adev->gfx.rlc.cp_table_ptr;
1299         for (me = 0; me < max_me; me++) {
1300                 if (me == 0) {
1301                         const struct gfx_firmware_header_v1_0 *hdr =
1302                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1303                         fw_data = (const __le32 *)
1304                                 (adev->gfx.ce_fw->data +
1305                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1306                         table_offset = le32_to_cpu(hdr->jt_offset);
1307                         table_size = le32_to_cpu(hdr->jt_size);
1308                 } else if (me == 1) {
1309                         const struct gfx_firmware_header_v1_0 *hdr =
1310                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1311                         fw_data = (const __le32 *)
1312                                 (adev->gfx.pfp_fw->data +
1313                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1314                         table_offset = le32_to_cpu(hdr->jt_offset);
1315                         table_size = le32_to_cpu(hdr->jt_size);
1316                 } else if (me == 2) {
1317                         const struct gfx_firmware_header_v1_0 *hdr =
1318                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1319                         fw_data = (const __le32 *)
1320                                 (adev->gfx.me_fw->data +
1321                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1322                         table_offset = le32_to_cpu(hdr->jt_offset);
1323                         table_size = le32_to_cpu(hdr->jt_size);
1324                 } else if (me == 3) {
1325                         const struct gfx_firmware_header_v1_0 *hdr =
1326                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1327                         fw_data = (const __le32 *)
1328                                 (adev->gfx.mec_fw->data +
1329                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1330                         table_offset = le32_to_cpu(hdr->jt_offset);
1331                         table_size = le32_to_cpu(hdr->jt_size);
1332                 } else  if (me == 4) {
1333                         const struct gfx_firmware_header_v1_0 *hdr =
1334                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
1335                         fw_data = (const __le32 *)
1336                                 (adev->gfx.mec2_fw->data +
1337                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1338                         table_offset = le32_to_cpu(hdr->jt_offset);
1339                         table_size = le32_to_cpu(hdr->jt_size);
1340                 }
1341
1342                 for (i = 0; i < table_size; i ++) {
1343                         dst_ptr[bo_offset + i] =
1344                                 cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
1345                 }
1346
1347                 bo_offset += table_size;
1348         }
1349 }
1350
1351 static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev)
1352 {
1353         amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, NULL, NULL);
1354         amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, NULL, NULL);
1355 }
1356
1357 static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
1358 {
1359         volatile u32 *dst_ptr;
1360         u32 dws;
1361         const struct cs_section_def *cs_data;
1362         int r;
1363
1364         adev->gfx.rlc.cs_data = vi_cs_data;
1365
1366         cs_data = adev->gfx.rlc.cs_data;
1367
1368         if (cs_data) {
1369                 /* clear state block */
1370                 adev->gfx.rlc.clear_state_size = dws = gfx_v8_0_get_csb_size(adev);
1371
1372                 r = amdgpu_bo_create_reserved(adev, dws * 4, PAGE_SIZE,
1373                                               AMDGPU_GEM_DOMAIN_VRAM,
1374                                               &adev->gfx.rlc.clear_state_obj,
1375                                               &adev->gfx.rlc.clear_state_gpu_addr,
1376                                               (void **)&adev->gfx.rlc.cs_ptr);
1377                 if (r) {
1378                         dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r);
1379                         gfx_v8_0_rlc_fini(adev);
1380                         return r;
1381                 }
1382
1383                 /* set up the cs buffer */
1384                 dst_ptr = adev->gfx.rlc.cs_ptr;
1385                 gfx_v8_0_get_csb_buffer(adev, dst_ptr);
1386                 amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
1387                 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1388         }
1389
1390         if ((adev->asic_type == CHIP_CARRIZO) ||
1391             (adev->asic_type == CHIP_STONEY)) {
1392                 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1393                 r = amdgpu_bo_create_reserved(adev, adev->gfx.rlc.cp_table_size,
1394                                               PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
1395                                               &adev->gfx.rlc.cp_table_obj,
1396                                               &adev->gfx.rlc.cp_table_gpu_addr,
1397                                               (void **)&adev->gfx.rlc.cp_table_ptr);
1398                 if (r) {
1399                         dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r);
1400                         return r;
1401                 }
1402
1403                 cz_init_cp_jump_table(adev);
1404
1405                 amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj);
1406                 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1407         }
1408
1409         return 0;
1410 }
1411
1412 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
1413 {
1414         amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1415 }
1416
1417 static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
1418 {
1419         int r;
1420         u32 *hpd;
1421         size_t mec_hpd_size;
1422
1423         bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1424
1425         /* take ownership of the relevant compute queues */
1426         amdgpu_gfx_compute_queue_acquire(adev);
1427
1428         mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE;
1429
1430         r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1431                                       AMDGPU_GEM_DOMAIN_VRAM,
1432                                       &adev->gfx.mec.hpd_eop_obj,
1433                                       &adev->gfx.mec.hpd_eop_gpu_addr,
1434                                       (void **)&hpd);
1435         if (r) {
1436                 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1437                 return r;
1438         }
1439
1440         memset(hpd, 0, mec_hpd_size);
1441
1442         amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1443         amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1444
1445         return 0;
1446 }
1447
1448 static const u32 vgpr_init_compute_shader[] =
1449 {
1450         0x7e000209, 0x7e020208,
1451         0x7e040207, 0x7e060206,
1452         0x7e080205, 0x7e0a0204,
1453         0x7e0c0203, 0x7e0e0202,
1454         0x7e100201, 0x7e120200,
1455         0x7e140209, 0x7e160208,
1456         0x7e180207, 0x7e1a0206,
1457         0x7e1c0205, 0x7e1e0204,
1458         0x7e200203, 0x7e220202,
1459         0x7e240201, 0x7e260200,
1460         0x7e280209, 0x7e2a0208,
1461         0x7e2c0207, 0x7e2e0206,
1462         0x7e300205, 0x7e320204,
1463         0x7e340203, 0x7e360202,
1464         0x7e380201, 0x7e3a0200,
1465         0x7e3c0209, 0x7e3e0208,
1466         0x7e400207, 0x7e420206,
1467         0x7e440205, 0x7e460204,
1468         0x7e480203, 0x7e4a0202,
1469         0x7e4c0201, 0x7e4e0200,
1470         0x7e500209, 0x7e520208,
1471         0x7e540207, 0x7e560206,
1472         0x7e580205, 0x7e5a0204,
1473         0x7e5c0203, 0x7e5e0202,
1474         0x7e600201, 0x7e620200,
1475         0x7e640209, 0x7e660208,
1476         0x7e680207, 0x7e6a0206,
1477         0x7e6c0205, 0x7e6e0204,
1478         0x7e700203, 0x7e720202,
1479         0x7e740201, 0x7e760200,
1480         0x7e780209, 0x7e7a0208,
1481         0x7e7c0207, 0x7e7e0206,
1482         0xbf8a0000, 0xbf810000,
1483 };
1484
1485 static const u32 sgpr_init_compute_shader[] =
1486 {
1487         0xbe8a0100, 0xbe8c0102,
1488         0xbe8e0104, 0xbe900106,
1489         0xbe920108, 0xbe940100,
1490         0xbe960102, 0xbe980104,
1491         0xbe9a0106, 0xbe9c0108,
1492         0xbe9e0100, 0xbea00102,
1493         0xbea20104, 0xbea40106,
1494         0xbea60108, 0xbea80100,
1495         0xbeaa0102, 0xbeac0104,
1496         0xbeae0106, 0xbeb00108,
1497         0xbeb20100, 0xbeb40102,
1498         0xbeb60104, 0xbeb80106,
1499         0xbeba0108, 0xbebc0100,
1500         0xbebe0102, 0xbec00104,
1501         0xbec20106, 0xbec40108,
1502         0xbec60100, 0xbec80102,
1503         0xbee60004, 0xbee70005,
1504         0xbeea0006, 0xbeeb0007,
1505         0xbee80008, 0xbee90009,
1506         0xbefc0000, 0xbf8a0000,
1507         0xbf810000, 0x00000000,
1508 };
1509
1510 static const u32 vgpr_init_regs[] =
1511 {
1512         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1513         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */
1514         mmCOMPUTE_NUM_THREAD_X, 256*4,
1515         mmCOMPUTE_NUM_THREAD_Y, 1,
1516         mmCOMPUTE_NUM_THREAD_Z, 1,
1517         mmCOMPUTE_PGM_RSRC1, 0x100004f, /* VGPRS=15 (64 logical VGPRs), SGPRS=1 (16 SGPRs), BULKY=1 */
1518         mmCOMPUTE_PGM_RSRC2, 20,
1519         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1520         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1521         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1522         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1523         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1524         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1525         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1526         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1527         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1528         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1529 };
1530
1531 static const u32 sgpr1_init_regs[] =
1532 {
1533         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1534         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */
1535         mmCOMPUTE_NUM_THREAD_X, 256*5,
1536         mmCOMPUTE_NUM_THREAD_Y, 1,
1537         mmCOMPUTE_NUM_THREAD_Z, 1,
1538         mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */
1539         mmCOMPUTE_PGM_RSRC2, 20,
1540         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1541         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1542         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1543         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1544         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1545         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1546         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1547         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1548         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1549         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1550 };
1551
1552 static const u32 sgpr2_init_regs[] =
1553 {
1554         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1555         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1556         mmCOMPUTE_NUM_THREAD_X, 256*5,
1557         mmCOMPUTE_NUM_THREAD_Y, 1,
1558         mmCOMPUTE_NUM_THREAD_Z, 1,
1559         mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */
1560         mmCOMPUTE_PGM_RSRC2, 20,
1561         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1562         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1563         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1564         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1565         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1566         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1567         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1568         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1569         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1570         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1571 };
1572
1573 static const u32 sec_ded_counter_registers[] =
1574 {
1575         mmCPC_EDC_ATC_CNT,
1576         mmCPC_EDC_SCRATCH_CNT,
1577         mmCPC_EDC_UCODE_CNT,
1578         mmCPF_EDC_ATC_CNT,
1579         mmCPF_EDC_ROQ_CNT,
1580         mmCPF_EDC_TAG_CNT,
1581         mmCPG_EDC_ATC_CNT,
1582         mmCPG_EDC_DMA_CNT,
1583         mmCPG_EDC_TAG_CNT,
1584         mmDC_EDC_CSINVOC_CNT,
1585         mmDC_EDC_RESTORE_CNT,
1586         mmDC_EDC_STATE_CNT,
1587         mmGDS_EDC_CNT,
1588         mmGDS_EDC_GRBM_CNT,
1589         mmGDS_EDC_OA_DED,
1590         mmSPI_EDC_CNT,
1591         mmSQC_ATC_EDC_GATCL1_CNT,
1592         mmSQC_EDC_CNT,
1593         mmSQ_EDC_DED_CNT,
1594         mmSQ_EDC_INFO,
1595         mmSQ_EDC_SEC_CNT,
1596         mmTCC_EDC_CNT,
1597         mmTCP_ATC_EDC_GATCL1_CNT,
1598         mmTCP_EDC_CNT,
1599         mmTD_EDC_CNT
1600 };
1601
1602 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1603 {
1604         struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1605         struct amdgpu_ib ib;
1606         struct dma_fence *f = NULL;
1607         int r, i;
1608         u32 tmp;
1609         unsigned total_size, vgpr_offset, sgpr_offset;
1610         u64 gpu_addr;
1611
1612         /* only supported on CZ */
1613         if (adev->asic_type != CHIP_CARRIZO)
1614                 return 0;
1615
1616         /* bail if the compute ring is not ready */
1617         if (!ring->sched.ready)
1618                 return 0;
1619
1620         tmp = RREG32(mmGB_EDC_MODE);
1621         WREG32(mmGB_EDC_MODE, 0);
1622
1623         total_size =
1624                 (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1625         total_size +=
1626                 (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1627         total_size +=
1628                 (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1629         total_size = ALIGN(total_size, 256);
1630         vgpr_offset = total_size;
1631         total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1632         sgpr_offset = total_size;
1633         total_size += sizeof(sgpr_init_compute_shader);
1634
1635         /* allocate an indirect buffer to put the commands in */
1636         memset(&ib, 0, sizeof(ib));
1637         r = amdgpu_ib_get(adev, NULL, total_size, &ib);
1638         if (r) {
1639                 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1640                 return r;
1641         }
1642
1643         /* load the compute shaders */
1644         for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1645                 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1646
1647         for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1648                 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1649
1650         /* init the ib length to 0 */
1651         ib.length_dw = 0;
1652
1653         /* VGPR */
1654         /* write the register state for the compute dispatch */
1655         for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1656                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1657                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1658                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1659         }
1660         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1661         gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1662         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1663         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1664         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1665         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1666
1667         /* write dispatch packet */
1668         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1669         ib.ptr[ib.length_dw++] = 8; /* x */
1670         ib.ptr[ib.length_dw++] = 1; /* y */
1671         ib.ptr[ib.length_dw++] = 1; /* z */
1672         ib.ptr[ib.length_dw++] =
1673                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1674
1675         /* write CS partial flush packet */
1676         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1677         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1678
1679         /* SGPR1 */
1680         /* write the register state for the compute dispatch */
1681         for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1682                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1683                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1684                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1685         }
1686         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1687         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1688         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1689         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1690         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1691         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1692
1693         /* write dispatch packet */
1694         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1695         ib.ptr[ib.length_dw++] = 8; /* x */
1696         ib.ptr[ib.length_dw++] = 1; /* y */
1697         ib.ptr[ib.length_dw++] = 1; /* z */
1698         ib.ptr[ib.length_dw++] =
1699                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1700
1701         /* write CS partial flush packet */
1702         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1703         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1704
1705         /* SGPR2 */
1706         /* write the register state for the compute dispatch */
1707         for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1708                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1709                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1710                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1711         }
1712         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1713         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1714         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1715         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1716         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1717         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1718
1719         /* write dispatch packet */
1720         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1721         ib.ptr[ib.length_dw++] = 8; /* x */
1722         ib.ptr[ib.length_dw++] = 1; /* y */
1723         ib.ptr[ib.length_dw++] = 1; /* z */
1724         ib.ptr[ib.length_dw++] =
1725                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1726
1727         /* write CS partial flush packet */
1728         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1729         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1730
1731         /* shedule the ib on the ring */
1732         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1733         if (r) {
1734                 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1735                 goto fail;
1736         }
1737
1738         /* wait for the GPU to finish processing the IB */
1739         r = dma_fence_wait(f, false);
1740         if (r) {
1741                 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1742                 goto fail;
1743         }
1744
1745         tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1746         tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1747         WREG32(mmGB_EDC_MODE, tmp);
1748
1749         tmp = RREG32(mmCC_GC_EDC_CONFIG);
1750         tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1751         WREG32(mmCC_GC_EDC_CONFIG, tmp);
1752
1753
1754         /* read back registers to clear the counters */
1755         for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1756                 RREG32(sec_ded_counter_registers[i]);
1757
1758 fail:
1759         amdgpu_ib_free(adev, &ib, NULL);
1760         dma_fence_put(f);
1761
1762         return r;
1763 }
1764
1765 static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1766 {
1767         u32 gb_addr_config;
1768         u32 mc_shared_chmap, mc_arb_ramcfg;
1769         u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1770         u32 tmp;
1771         int ret;
1772
1773         switch (adev->asic_type) {
1774         case CHIP_TOPAZ:
1775                 adev->gfx.config.max_shader_engines = 1;
1776                 adev->gfx.config.max_tile_pipes = 2;
1777                 adev->gfx.config.max_cu_per_sh = 6;
1778                 adev->gfx.config.max_sh_per_se = 1;
1779                 adev->gfx.config.max_backends_per_se = 2;
1780                 adev->gfx.config.max_texture_channel_caches = 2;
1781                 adev->gfx.config.max_gprs = 256;
1782                 adev->gfx.config.max_gs_threads = 32;
1783                 adev->gfx.config.max_hw_contexts = 8;
1784
1785                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1786                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1787                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1788                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1789                 gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1790                 break;
1791         case CHIP_FIJI:
1792                 adev->gfx.config.max_shader_engines = 4;
1793                 adev->gfx.config.max_tile_pipes = 16;
1794                 adev->gfx.config.max_cu_per_sh = 16;
1795                 adev->gfx.config.max_sh_per_se = 1;
1796                 adev->gfx.config.max_backends_per_se = 4;
1797                 adev->gfx.config.max_texture_channel_caches = 16;
1798                 adev->gfx.config.max_gprs = 256;
1799                 adev->gfx.config.max_gs_threads = 32;
1800                 adev->gfx.config.max_hw_contexts = 8;
1801
1802                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1803                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1804                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1805                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1806                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1807                 break;
1808         case CHIP_POLARIS11:
1809         case CHIP_POLARIS12:
1810                 ret = amdgpu_atombios_get_gfx_info(adev);
1811                 if (ret)
1812                         return ret;
1813                 adev->gfx.config.max_gprs = 256;
1814                 adev->gfx.config.max_gs_threads = 32;
1815                 adev->gfx.config.max_hw_contexts = 8;
1816
1817                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1818                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1819                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1820                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1821                 gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
1822                 break;
1823         case CHIP_POLARIS10:
1824         case CHIP_VEGAM:
1825                 ret = amdgpu_atombios_get_gfx_info(adev);
1826                 if (ret)
1827                         return ret;
1828                 adev->gfx.config.max_gprs = 256;
1829                 adev->gfx.config.max_gs_threads = 32;
1830                 adev->gfx.config.max_hw_contexts = 8;
1831
1832                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1833                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1834                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1835                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1836                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1837                 break;
1838         case CHIP_TONGA:
1839                 adev->gfx.config.max_shader_engines = 4;
1840                 adev->gfx.config.max_tile_pipes = 8;
1841                 adev->gfx.config.max_cu_per_sh = 8;
1842                 adev->gfx.config.max_sh_per_se = 1;
1843                 adev->gfx.config.max_backends_per_se = 2;
1844                 adev->gfx.config.max_texture_channel_caches = 8;
1845                 adev->gfx.config.max_gprs = 256;
1846                 adev->gfx.config.max_gs_threads = 32;
1847                 adev->gfx.config.max_hw_contexts = 8;
1848
1849                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1850                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1851                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1852                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1853                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1854                 break;
1855         case CHIP_CARRIZO:
1856                 adev->gfx.config.max_shader_engines = 1;
1857                 adev->gfx.config.max_tile_pipes = 2;
1858                 adev->gfx.config.max_sh_per_se = 1;
1859                 adev->gfx.config.max_backends_per_se = 2;
1860                 adev->gfx.config.max_cu_per_sh = 8;
1861                 adev->gfx.config.max_texture_channel_caches = 2;
1862                 adev->gfx.config.max_gprs = 256;
1863                 adev->gfx.config.max_gs_threads = 32;
1864                 adev->gfx.config.max_hw_contexts = 8;
1865
1866                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1867                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1868                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1869                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1870                 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1871                 break;
1872         case CHIP_STONEY:
1873                 adev->gfx.config.max_shader_engines = 1;
1874                 adev->gfx.config.max_tile_pipes = 2;
1875                 adev->gfx.config.max_sh_per_se = 1;
1876                 adev->gfx.config.max_backends_per_se = 1;
1877                 adev->gfx.config.max_cu_per_sh = 3;
1878                 adev->gfx.config.max_texture_channel_caches = 2;
1879                 adev->gfx.config.max_gprs = 256;
1880                 adev->gfx.config.max_gs_threads = 16;
1881                 adev->gfx.config.max_hw_contexts = 8;
1882
1883                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1884                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1885                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1886                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1887                 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1888                 break;
1889         default:
1890                 adev->gfx.config.max_shader_engines = 2;
1891                 adev->gfx.config.max_tile_pipes = 4;
1892                 adev->gfx.config.max_cu_per_sh = 2;
1893                 adev->gfx.config.max_sh_per_se = 1;
1894                 adev->gfx.config.max_backends_per_se = 2;
1895                 adev->gfx.config.max_texture_channel_caches = 4;
1896                 adev->gfx.config.max_gprs = 256;
1897                 adev->gfx.config.max_gs_threads = 32;
1898                 adev->gfx.config.max_hw_contexts = 8;
1899
1900                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1901                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1902                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1903                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1904                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1905                 break;
1906         }
1907
1908         mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
1909         adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1910         mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1911
1912         adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1913         adev->gfx.config.mem_max_burst_length_bytes = 256;
1914         if (adev->flags & AMD_IS_APU) {
1915                 /* Get memory bank mapping mode. */
1916                 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
1917                 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1918                 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1919
1920                 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
1921                 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1922                 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1923
1924                 /* Validate settings in case only one DIMM installed. */
1925                 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
1926                         dimm00_addr_map = 0;
1927                 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
1928                         dimm01_addr_map = 0;
1929                 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
1930                         dimm10_addr_map = 0;
1931                 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
1932                         dimm11_addr_map = 0;
1933
1934                 /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1935                 /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1936                 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
1937                         adev->gfx.config.mem_row_size_in_kb = 2;
1938                 else
1939                         adev->gfx.config.mem_row_size_in_kb = 1;
1940         } else {
1941                 tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
1942                 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1943                 if (adev->gfx.config.mem_row_size_in_kb > 4)
1944                         adev->gfx.config.mem_row_size_in_kb = 4;
1945         }
1946
1947         adev->gfx.config.shader_engine_tile_size = 32;
1948         adev->gfx.config.num_gpus = 1;
1949         adev->gfx.config.multi_gpu_tile_size = 64;
1950
1951         /* fix up row size */
1952         switch (adev->gfx.config.mem_row_size_in_kb) {
1953         case 1:
1954         default:
1955                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
1956                 break;
1957         case 2:
1958                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
1959                 break;
1960         case 4:
1961                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
1962                 break;
1963         }
1964         adev->gfx.config.gb_addr_config = gb_addr_config;
1965
1966         return 0;
1967 }
1968
1969 static int gfx_v8_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
1970                                         int mec, int pipe, int queue)
1971 {
1972         int r;
1973         unsigned irq_type;
1974         struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
1975
1976         ring = &adev->gfx.compute_ring[ring_id];
1977
1978         /* mec0 is me1 */
1979         ring->me = mec + 1;
1980         ring->pipe = pipe;
1981         ring->queue = queue;
1982
1983         ring->ring_obj = NULL;
1984         ring->use_doorbell = true;
1985         ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + ring_id;
1986         ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
1987                                 + (ring_id * GFX8_MEC_HPD_SIZE);
1988         sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
1989
1990         irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
1991                 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
1992                 + ring->pipe;
1993
1994         /* type-2 packets are deprecated on MEC, use type-3 instead */
1995         r = amdgpu_ring_init(adev, ring, 1024,
1996                         &adev->gfx.eop_irq, irq_type);
1997         if (r)
1998                 return r;
1999
2000
2001         return 0;
2002 }
2003
2004 static void gfx_v8_0_sq_irq_work_func(struct work_struct *work);
2005
2006 static int gfx_v8_0_sw_init(void *handle)
2007 {
2008         int i, j, k, r, ring_id;
2009         struct amdgpu_ring *ring;
2010         struct amdgpu_kiq *kiq;
2011         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2012
2013         switch (adev->asic_type) {
2014         case CHIP_TONGA:
2015         case CHIP_CARRIZO:
2016         case CHIP_FIJI:
2017         case CHIP_POLARIS10:
2018         case CHIP_POLARIS11:
2019         case CHIP_POLARIS12:
2020         case CHIP_VEGAM:
2021                 adev->gfx.mec.num_mec = 2;
2022                 break;
2023         case CHIP_TOPAZ:
2024         case CHIP_STONEY:
2025         default:
2026                 adev->gfx.mec.num_mec = 1;
2027                 break;
2028         }
2029
2030         adev->gfx.mec.num_pipe_per_mec = 4;
2031         adev->gfx.mec.num_queue_per_pipe = 8;
2032
2033         /* EOP Event */
2034         r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_END_OF_PIPE, &adev->gfx.eop_irq);
2035         if (r)
2036                 return r;
2037
2038         /* Privileged reg */
2039         r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_PRIV_REG_FAULT,
2040                               &adev->gfx.priv_reg_irq);
2041         if (r)
2042                 return r;
2043
2044         /* Privileged inst */
2045         r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_PRIV_INSTR_FAULT,
2046                               &adev->gfx.priv_inst_irq);
2047         if (r)
2048                 return r;
2049
2050         /* Add CP EDC/ECC irq  */
2051         r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_ECC_ERROR,
2052                               &adev->gfx.cp_ecc_error_irq);
2053         if (r)
2054                 return r;
2055
2056         /* SQ interrupts. */
2057         r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SQ_INTERRUPT_MSG,
2058                               &adev->gfx.sq_irq);
2059         if (r) {
2060                 DRM_ERROR("amdgpu_irq_add() for SQ failed: %d\n", r);
2061                 return r;
2062         }
2063
2064         INIT_WORK(&adev->gfx.sq_work.work, gfx_v8_0_sq_irq_work_func);
2065
2066         adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2067
2068         gfx_v8_0_scratch_init(adev);
2069
2070         r = gfx_v8_0_init_microcode(adev);
2071         if (r) {
2072                 DRM_ERROR("Failed to load gfx firmware!\n");
2073                 return r;
2074         }
2075
2076         r = gfx_v8_0_rlc_init(adev);
2077         if (r) {
2078                 DRM_ERROR("Failed to init rlc BOs!\n");
2079                 return r;
2080         }
2081
2082         r = gfx_v8_0_mec_init(adev);
2083         if (r) {
2084                 DRM_ERROR("Failed to init MEC BOs!\n");
2085                 return r;
2086         }
2087
2088         /* set up the gfx ring */
2089         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2090                 ring = &adev->gfx.gfx_ring[i];
2091                 ring->ring_obj = NULL;
2092                 sprintf(ring->name, "gfx");
2093                 /* no gfx doorbells on iceland */
2094                 if (adev->asic_type != CHIP_TOPAZ) {
2095                         ring->use_doorbell = true;
2096                         ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0;
2097                 }
2098
2099                 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2100                                      AMDGPU_CP_IRQ_GFX_EOP);
2101                 if (r)
2102                         return r;
2103         }
2104
2105
2106         /* set up the compute queues - allocate horizontally across pipes */
2107         ring_id = 0;
2108         for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
2109                 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
2110                         for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
2111                                 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
2112                                         continue;
2113
2114                                 r = gfx_v8_0_compute_ring_init(adev,
2115                                                                 ring_id,
2116                                                                 i, k, j);
2117                                 if (r)
2118                                         return r;
2119
2120                                 ring_id++;
2121                         }
2122                 }
2123         }
2124
2125         r = amdgpu_gfx_kiq_init(adev, GFX8_MEC_HPD_SIZE);
2126         if (r) {
2127                 DRM_ERROR("Failed to init KIQ BOs!\n");
2128                 return r;
2129         }
2130
2131         kiq = &adev->gfx.kiq;
2132         r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
2133         if (r)
2134                 return r;
2135
2136         /* create MQD for all compute queues as well as KIQ for SRIOV case */
2137         r = amdgpu_gfx_compute_mqd_sw_init(adev, sizeof(struct vi_mqd_allocation));
2138         if (r)
2139                 return r;
2140
2141         adev->gfx.ce_ram_size = 0x8000;
2142
2143         r = gfx_v8_0_gpu_early_init(adev);
2144         if (r)
2145                 return r;
2146
2147         return 0;
2148 }
2149
2150 static int gfx_v8_0_sw_fini(void *handle)
2151 {
2152         int i;
2153         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2154
2155         amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
2156         amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL);
2157         amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
2158
2159         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2160                 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2161         for (i = 0; i < adev->gfx.num_compute_rings; i++)
2162                 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2163
2164         amdgpu_gfx_compute_mqd_sw_fini(adev);
2165         amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
2166         amdgpu_gfx_kiq_fini(adev);
2167
2168         gfx_v8_0_mec_fini(adev);
2169         gfx_v8_0_rlc_fini(adev);
2170         amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
2171                                 &adev->gfx.rlc.clear_state_gpu_addr,
2172                                 (void **)&adev->gfx.rlc.cs_ptr);
2173         if ((adev->asic_type == CHIP_CARRIZO) ||
2174             (adev->asic_type == CHIP_STONEY)) {
2175                 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
2176                                 &adev->gfx.rlc.cp_table_gpu_addr,
2177                                 (void **)&adev->gfx.rlc.cp_table_ptr);
2178         }
2179         gfx_v8_0_free_microcode(adev);
2180
2181         return 0;
2182 }
2183
2184 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
2185 {
2186         uint32_t *modearray, *mod2array;
2187         const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
2188         const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
2189         u32 reg_offset;
2190
2191         modearray = adev->gfx.config.tile_mode_array;
2192         mod2array = adev->gfx.config.macrotile_mode_array;
2193
2194         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2195                 modearray[reg_offset] = 0;
2196
2197         for (reg_offset = 0; reg_offset <  num_secondary_tile_mode_states; reg_offset++)
2198                 mod2array[reg_offset] = 0;
2199
2200         switch (adev->asic_type) {
2201         case CHIP_TOPAZ:
2202                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2203                                 PIPE_CONFIG(ADDR_SURF_P2) |
2204                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2205                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2206                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2207                                 PIPE_CONFIG(ADDR_SURF_P2) |
2208                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2209                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2210                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2211                                 PIPE_CONFIG(ADDR_SURF_P2) |
2212                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2213                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2214                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2215                                 PIPE_CONFIG(ADDR_SURF_P2) |
2216                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2217                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2218                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2219                                 PIPE_CONFIG(ADDR_SURF_P2) |
2220                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2221                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2222                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2223                                 PIPE_CONFIG(ADDR_SURF_P2) |
2224                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2225                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2226                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2227                                 PIPE_CONFIG(ADDR_SURF_P2) |
2228                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2229                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2230                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2231                                 PIPE_CONFIG(ADDR_SURF_P2));
2232                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2233                                 PIPE_CONFIG(ADDR_SURF_P2) |
2234                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2235                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2236                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2237                                  PIPE_CONFIG(ADDR_SURF_P2) |
2238                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2239                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2240                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2241                                  PIPE_CONFIG(ADDR_SURF_P2) |
2242                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2243                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2244                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2245                                  PIPE_CONFIG(ADDR_SURF_P2) |
2246                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2247                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2248                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2249                                  PIPE_CONFIG(ADDR_SURF_P2) |
2250                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2251                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2252                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2253                                  PIPE_CONFIG(ADDR_SURF_P2) |
2254                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2255                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2256                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2257                                  PIPE_CONFIG(ADDR_SURF_P2) |
2258                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2259                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2260                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2261                                  PIPE_CONFIG(ADDR_SURF_P2) |
2262                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2263                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2264                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2265                                  PIPE_CONFIG(ADDR_SURF_P2) |
2266                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2267                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2268                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2269                                  PIPE_CONFIG(ADDR_SURF_P2) |
2270                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2271                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2272                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2273                                  PIPE_CONFIG(ADDR_SURF_P2) |
2274                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2275                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2276                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2277                                  PIPE_CONFIG(ADDR_SURF_P2) |
2278                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2279                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2280                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2281                                  PIPE_CONFIG(ADDR_SURF_P2) |
2282                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2283                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2284                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2285                                  PIPE_CONFIG(ADDR_SURF_P2) |
2286                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2287                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2288                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2289                                  PIPE_CONFIG(ADDR_SURF_P2) |
2290                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2291                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2292                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2293                                  PIPE_CONFIG(ADDR_SURF_P2) |
2294                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2295                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2296                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2297                                  PIPE_CONFIG(ADDR_SURF_P2) |
2298                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2299                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2300                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2301                                  PIPE_CONFIG(ADDR_SURF_P2) |
2302                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2303                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2304
2305                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2306                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2307                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2308                                 NUM_BANKS(ADDR_SURF_8_BANK));
2309                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2310                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2311                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2312                                 NUM_BANKS(ADDR_SURF_8_BANK));
2313                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2314                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2315                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2316                                 NUM_BANKS(ADDR_SURF_8_BANK));
2317                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2318                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2319                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2320                                 NUM_BANKS(ADDR_SURF_8_BANK));
2321                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2322                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2323                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2324                                 NUM_BANKS(ADDR_SURF_8_BANK));
2325                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2326                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2327                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2328                                 NUM_BANKS(ADDR_SURF_8_BANK));
2329                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2330                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2331                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2332                                 NUM_BANKS(ADDR_SURF_8_BANK));
2333                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2334                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2335                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2336                                 NUM_BANKS(ADDR_SURF_16_BANK));
2337                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2338                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2339                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2340                                 NUM_BANKS(ADDR_SURF_16_BANK));
2341                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2342                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2343                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2344                                  NUM_BANKS(ADDR_SURF_16_BANK));
2345                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2346                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2347                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2348                                  NUM_BANKS(ADDR_SURF_16_BANK));
2349                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2350                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2351                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2352                                  NUM_BANKS(ADDR_SURF_16_BANK));
2353                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2354                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2355                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2356                                  NUM_BANKS(ADDR_SURF_16_BANK));
2357                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2358                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2359                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2360                                  NUM_BANKS(ADDR_SURF_8_BANK));
2361
2362                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2363                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2364                             reg_offset != 23)
2365                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2366
2367                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2368                         if (reg_offset != 7)
2369                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2370
2371                 break;
2372         case CHIP_FIJI:
2373         case CHIP_VEGAM:
2374                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2375                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2376                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2377                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2378                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2379                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2380                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2381                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2382                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2383                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2384                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2385                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2386                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2387                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2388                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2389                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2390                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2391                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2392                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2393                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2394                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2395                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2396                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2397                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2398                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2399                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2400                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2401                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2402                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2403                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2404                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2405                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2406                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2407                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2408                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2409                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2410                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2411                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2412                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2413                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2414                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2415                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2416                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2417                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2418                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2419                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2420                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2421                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2422                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2423                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2424                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2425                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2426                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2427                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2428                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2429                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2430                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2431                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2432                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2433                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2434                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2435                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2436                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2437                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2438                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2439                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2440                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2441                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2442                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2443                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2444                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2445                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2446                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2447                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2448                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2449                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2450                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2451                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2452                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2453                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2454                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2455                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2456                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2457                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2458                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2459                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2460                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2461                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2462                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2463                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2464                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2465                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2466                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2467                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2468                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2469                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2470                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2471                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2472                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2473                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2474                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2475                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2476                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2477                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2478                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2479                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2480                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2481                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2482                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2483                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2484                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2485                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2486                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2487                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2488                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2489                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2490                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2491                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2492                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2493                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2494                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2495                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2496
2497                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2498                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2499                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2500                                 NUM_BANKS(ADDR_SURF_8_BANK));
2501                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2502                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2503                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2504                                 NUM_BANKS(ADDR_SURF_8_BANK));
2505                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2506                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2507                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2508                                 NUM_BANKS(ADDR_SURF_8_BANK));
2509                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2510                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2511                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2512                                 NUM_BANKS(ADDR_SURF_8_BANK));
2513                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2514                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2515                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2516                                 NUM_BANKS(ADDR_SURF_8_BANK));
2517                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2518                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2519                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2520                                 NUM_BANKS(ADDR_SURF_8_BANK));
2521                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2522                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2523                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2524                                 NUM_BANKS(ADDR_SURF_8_BANK));
2525                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2526                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2527                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2528                                 NUM_BANKS(ADDR_SURF_8_BANK));
2529                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2530                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2531                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2532                                 NUM_BANKS(ADDR_SURF_8_BANK));
2533                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2534                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2535                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2536                                  NUM_BANKS(ADDR_SURF_8_BANK));
2537                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2538                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2539                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2540                                  NUM_BANKS(ADDR_SURF_8_BANK));
2541                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2542                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2543                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2544                                  NUM_BANKS(ADDR_SURF_8_BANK));
2545                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2546                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2547                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2548                                  NUM_BANKS(ADDR_SURF_8_BANK));
2549                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2550                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2551                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2552                                  NUM_BANKS(ADDR_SURF_4_BANK));
2553
2554                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2555                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2556
2557                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2558                         if (reg_offset != 7)
2559                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2560
2561                 break;
2562         case CHIP_TONGA:
2563                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2564                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2565                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2566                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2567                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2568                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2569                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2570                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2571                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2572                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2573                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2574                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2575                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2576                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2577                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2578                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2579                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2580                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2581                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2582                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2583                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2584                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2585                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2586                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2587                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2588                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2589                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2590                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2591                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2592                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2593                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2594                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2595                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2596                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2597                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2598                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2599                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2600                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2601                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2602                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2603                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2604                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2605                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2606                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2607                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2608                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2609                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2610                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2611                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2612                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2613                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2614                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2615                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2616                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2617                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2618                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2619                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2620                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2621                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2622                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2623                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2624                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2625                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2626                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2627                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2628                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2629                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2630                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2631                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2632                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2633                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2634                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2635                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2636                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2637                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2638                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2639                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2640                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2641                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2642                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2643                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2644                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2645                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2646                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2647                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2648                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2649                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2650                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2651                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2652                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2653                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2654                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2655                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2656                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2657                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2658                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2659                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2660                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2661                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2662                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2663                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2664                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2665                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2666                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2667                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2668                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2669                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2670                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2671                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2672                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2673                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2674                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2675                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2676                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2677                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2678                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2679                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2680                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2681                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2682                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2683                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2684                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2685
2686                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2687                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2688                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2689                                 NUM_BANKS(ADDR_SURF_16_BANK));
2690                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2691                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2692                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2693                                 NUM_BANKS(ADDR_SURF_16_BANK));
2694                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2695                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2696                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2697                                 NUM_BANKS(ADDR_SURF_16_BANK));
2698                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2699                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2700                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2701                                 NUM_BANKS(ADDR_SURF_16_BANK));
2702                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2703                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2704                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2705                                 NUM_BANKS(ADDR_SURF_16_BANK));
2706                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2707                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2708                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2709                                 NUM_BANKS(ADDR_SURF_16_BANK));
2710                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2711                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2712                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2713                                 NUM_BANKS(ADDR_SURF_16_BANK));
2714                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2715                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2716                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2717                                 NUM_BANKS(ADDR_SURF_16_BANK));
2718                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2719                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2720                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2721                                 NUM_BANKS(ADDR_SURF_16_BANK));
2722                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2723                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2724                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2725                                  NUM_BANKS(ADDR_SURF_16_BANK));
2726                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2727                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2728                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2729                                  NUM_BANKS(ADDR_SURF_16_BANK));
2730                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2731                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2732                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2733                                  NUM_BANKS(ADDR_SURF_8_BANK));
2734                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2735                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2736                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2737                                  NUM_BANKS(ADDR_SURF_4_BANK));
2738                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2739                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2740                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2741                                  NUM_BANKS(ADDR_SURF_4_BANK));
2742
2743                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2744                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2745
2746                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2747                         if (reg_offset != 7)
2748                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2749
2750                 break;
2751         case CHIP_POLARIS11:
2752         case CHIP_POLARIS12:
2753                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2754                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2755                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2756                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2757                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2758                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2759                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2760                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2761                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2762                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2763                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2764                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2765                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2766                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2767                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2768                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2769                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2770                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2771                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2772                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2773                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2774                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2775                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2776                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2777                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2778                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2779                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2780                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2781                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2782                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2783                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2784                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2785                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2786                                 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2787                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2788                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2789                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2790                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2791                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2792                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2793                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2794                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2795                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2796                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2797                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2798                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2799                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2800                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2801                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2802                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2803                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2804                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2805                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2806                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2807                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2808                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2809                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2810                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2811                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2812                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2813                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2814                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2815                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2816                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2817                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2818                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2819                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2820                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2821                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2822                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2823                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2824                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2825                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2826                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2827                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2828                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2829                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2830                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2831                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2832                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2833                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2834                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2835                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2836                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2837                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2838                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2839                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2840                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2841                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2842                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2843                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2844                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2845                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2846                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2847                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2848                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2849                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2850                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2851                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2852                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2853                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2854                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2855                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2856                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2857                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2858                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2859                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2860                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2861                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2862                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2863                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2864                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2865                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2866                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2867                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2868                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2869                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2870                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2871                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2872                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2873                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2874                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2875
2876                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2877                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2878                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2879                                 NUM_BANKS(ADDR_SURF_16_BANK));
2880
2881                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2882                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2883                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2884                                 NUM_BANKS(ADDR_SURF_16_BANK));
2885
2886                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2887                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2888                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2889                                 NUM_BANKS(ADDR_SURF_16_BANK));
2890
2891                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2892                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2893                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2894                                 NUM_BANKS(ADDR_SURF_16_BANK));
2895
2896                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2897                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2898                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2899                                 NUM_BANKS(ADDR_SURF_16_BANK));
2900
2901                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2902                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2903                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2904                                 NUM_BANKS(ADDR_SURF_16_BANK));
2905
2906                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2907                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2908                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2909                                 NUM_BANKS(ADDR_SURF_16_BANK));
2910
2911                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2912                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2913                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2914                                 NUM_BANKS(ADDR_SURF_16_BANK));
2915
2916                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2917                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2918                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2919                                 NUM_BANKS(ADDR_SURF_16_BANK));
2920
2921                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2922                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2923                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2924                                 NUM_BANKS(ADDR_SURF_16_BANK));
2925
2926                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2927                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2928                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2929                                 NUM_BANKS(ADDR_SURF_16_BANK));
2930
2931                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2932                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2933                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2934                                 NUM_BANKS(ADDR_SURF_16_BANK));
2935
2936                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2937                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2938                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2939                                 NUM_BANKS(ADDR_SURF_8_BANK));
2940
2941                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2942                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2943                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2944                                 NUM_BANKS(ADDR_SURF_4_BANK));
2945
2946                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2947                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2948
2949                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2950                         if (reg_offset != 7)
2951                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2952
2953                 break;
2954         case CHIP_POLARIS10:
2955                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2956                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2957                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2958                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2959                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2960                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2961                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2962                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2963                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2964                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2965                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2966                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2967                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2968                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2969                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2970                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2971                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2972                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2973                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2974                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2975                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2976                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2977                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2978                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2979                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2980                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2981                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2982                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2983                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2984                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2985                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2986                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2987                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2988                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2989                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2990                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2991                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2992                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2993                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2994                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2995                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2996                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2997                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2998                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2999                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3000                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3001                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3002                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3003                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3004                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3005                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3006                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3007                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3008                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3009                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3010                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3011                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3012                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3013                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3014                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3015                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3016                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3017                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3018                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3019                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3020                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3021                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3022                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3023                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3024                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3025                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3026                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3027                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3028                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3029                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3030                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3031                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3032                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3033                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3034                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3035                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3036                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3037                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3038                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3039                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3040                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3041                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3042                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3043                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3044                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3045                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3046                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3047                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3048                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3049                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3050                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3051                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3052                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3053                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3054                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3055                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3056                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3057                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3058                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3059                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3060                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3061                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3062                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3063                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3064                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3065                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3066                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3067                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3068                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3069                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3070                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3071                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3072                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3073                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3074                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3075                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3076                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3077
3078                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3079                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3080                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3081                                 NUM_BANKS(ADDR_SURF_16_BANK));
3082
3083                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3084                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3085                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3086                                 NUM_BANKS(ADDR_SURF_16_BANK));
3087
3088                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3089                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3090                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3091                                 NUM_BANKS(ADDR_SURF_16_BANK));
3092
3093                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3094                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3095                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3096                                 NUM_BANKS(ADDR_SURF_16_BANK));
3097
3098                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3099                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3100                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3101                                 NUM_BANKS(ADDR_SURF_16_BANK));
3102
3103                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3104                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3105                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3106                                 NUM_BANKS(ADDR_SURF_16_BANK));
3107
3108                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3109                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3110                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3111                                 NUM_BANKS(ADDR_SURF_16_BANK));
3112
3113                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3114                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3115                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3116                                 NUM_BANKS(ADDR_SURF_16_BANK));
3117
3118                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3119                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3120                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3121                                 NUM_BANKS(ADDR_SURF_16_BANK));
3122
3123                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3124                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3125                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3126                                 NUM_BANKS(ADDR_SURF_16_BANK));
3127
3128                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3129                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3130                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3131                                 NUM_BANKS(ADDR_SURF_16_BANK));
3132
3133                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3134                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3135                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3136                                 NUM_BANKS(ADDR_SURF_8_BANK));
3137
3138                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3139                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3140                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3141                                 NUM_BANKS(ADDR_SURF_4_BANK));
3142
3143                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3144                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3145                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3146                                 NUM_BANKS(ADDR_SURF_4_BANK));
3147
3148                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3149                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3150
3151                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3152                         if (reg_offset != 7)
3153                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3154
3155                 break;
3156         case CHIP_STONEY:
3157                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3158                                 PIPE_CONFIG(ADDR_SURF_P2) |
3159                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3160                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3161                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3162                                 PIPE_CONFIG(ADDR_SURF_P2) |
3163                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3164                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3165                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3166                                 PIPE_CONFIG(ADDR_SURF_P2) |
3167                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3168                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3169                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3170                                 PIPE_CONFIG(ADDR_SURF_P2) |
3171                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3172                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3173                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3174                                 PIPE_CONFIG(ADDR_SURF_P2) |
3175                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3176                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3177                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3178                                 PIPE_CONFIG(ADDR_SURF_P2) |
3179                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3180                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3181                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3182                                 PIPE_CONFIG(ADDR_SURF_P2) |
3183                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3184                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3185                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3186                                 PIPE_CONFIG(ADDR_SURF_P2));
3187                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3188                                 PIPE_CONFIG(ADDR_SURF_P2) |
3189                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3190                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3191                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3192                                  PIPE_CONFIG(ADDR_SURF_P2) |
3193                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3194                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3195                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3196                                  PIPE_CONFIG(ADDR_SURF_P2) |
3197                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3198                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3199                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3200                                  PIPE_CONFIG(ADDR_SURF_P2) |
3201                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3202                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3203                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3204                                  PIPE_CONFIG(ADDR_SURF_P2) |
3205                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3206                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3207                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3208                                  PIPE_CONFIG(ADDR_SURF_P2) |
3209                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3210                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3211                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3212                                  PIPE_CONFIG(ADDR_SURF_P2) |
3213                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3214                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3215                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3216                                  PIPE_CONFIG(ADDR_SURF_P2) |
3217                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3218                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3219                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3220                                  PIPE_CONFIG(ADDR_SURF_P2) |
3221                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3222                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3223                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3224                                  PIPE_CONFIG(ADDR_SURF_P2) |
3225                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3226                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3227                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3228                                  PIPE_CONFIG(ADDR_SURF_P2) |
3229                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3230                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3231                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3232                                  PIPE_CONFIG(ADDR_SURF_P2) |
3233                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3234                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3235                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3236                                  PIPE_CONFIG(ADDR_SURF_P2) |
3237                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3238                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3239                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3240                                  PIPE_CONFIG(ADDR_SURF_P2) |
3241                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3242                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3243                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3244                                  PIPE_CONFIG(ADDR_SURF_P2) |
3245                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3246                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3247                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3248                                  PIPE_CONFIG(ADDR_SURF_P2) |
3249                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3250                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3251                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3252                                  PIPE_CONFIG(ADDR_SURF_P2) |
3253                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3254                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3255                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3256                                  PIPE_CONFIG(ADDR_SURF_P2) |
3257                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3258                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3259
3260                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3261                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3262                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3263                                 NUM_BANKS(ADDR_SURF_8_BANK));
3264                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3265                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3266                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3267                                 NUM_BANKS(ADDR_SURF_8_BANK));
3268                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3269                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3270                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3271                                 NUM_BANKS(ADDR_SURF_8_BANK));
3272                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3273                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3274                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3275                                 NUM_BANKS(ADDR_SURF_8_BANK));
3276                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3277                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3278                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3279                                 NUM_BANKS(ADDR_SURF_8_BANK));
3280                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3281                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3282                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3283                                 NUM_BANKS(ADDR_SURF_8_BANK));
3284                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3285                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3286                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3287                                 NUM_BANKS(ADDR_SURF_8_BANK));
3288                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3289                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3290                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3291                                 NUM_BANKS(ADDR_SURF_16_BANK));
3292                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3293                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3294                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3295                                 NUM_BANKS(ADDR_SURF_16_BANK));
3296                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3297                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3298                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3299                                  NUM_BANKS(ADDR_SURF_16_BANK));
3300                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3301                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3302                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3303                                  NUM_BANKS(ADDR_SURF_16_BANK));
3304                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3305                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3306                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3307                                  NUM_BANKS(ADDR_SURF_16_BANK));
3308                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3309                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3310                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3311                                  NUM_BANKS(ADDR_SURF_16_BANK));
3312                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3313                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3314                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3315                                  NUM_BANKS(ADDR_SURF_8_BANK));
3316
3317                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3318                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3319                             reg_offset != 23)
3320                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3321
3322                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3323                         if (reg_offset != 7)
3324                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3325
3326                 break;
3327         default:
3328                 dev_warn(adev->dev,
3329                          "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
3330                          adev->asic_type);
3331
3332         case CHIP_CARRIZO:
3333                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3334                                 PIPE_CONFIG(ADDR_SURF_P2) |
3335                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3336                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3337                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3338                                 PIPE_CONFIG(ADDR_SURF_P2) |
3339                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3340                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3341                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3342                                 PIPE_CONFIG(ADDR_SURF_P2) |
3343                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3344                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3345                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3346                                 PIPE_CONFIG(ADDR_SURF_P2) |
3347                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3348                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3349                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3350                                 PIPE_CONFIG(ADDR_SURF_P2) |
3351                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3352                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3353                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3354                                 PIPE_CONFIG(ADDR_SURF_P2) |
3355                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3356                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3357                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3358                                 PIPE_CONFIG(ADDR_SURF_P2) |
3359                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3360                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3361                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3362                                 PIPE_CONFIG(ADDR_SURF_P2));
3363                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3364                                 PIPE_CONFIG(ADDR_SURF_P2) |
3365                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3366                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3367                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3368                                  PIPE_CONFIG(ADDR_SURF_P2) |
3369                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3370                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3371                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3372                                  PIPE_CONFIG(ADDR_SURF_P2) |
3373                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3374                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3375                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3376                                  PIPE_CONFIG(ADDR_SURF_P2) |
3377                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3378                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3379                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3380                                  PIPE_CONFIG(ADDR_SURF_P2) |
3381                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3382                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3383                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3384                                  PIPE_CONFIG(ADDR_SURF_P2) |
3385                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3386                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3387                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3388                                  PIPE_CONFIG(ADDR_SURF_P2) |
3389                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3390                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3391                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3392                                  PIPE_CONFIG(ADDR_SURF_P2) |
3393                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3394                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3395                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3396                                  PIPE_CONFIG(ADDR_SURF_P2) |
3397                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3398                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3399                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3400                                  PIPE_CONFIG(ADDR_SURF_P2) |
3401                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3402                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3403                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3404                                  PIPE_CONFIG(ADDR_SURF_P2) |
3405                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3406                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3407                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3408                                  PIPE_CONFIG(ADDR_SURF_P2) |
3409                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3410                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3411                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3412                                  PIPE_CONFIG(ADDR_SURF_P2) |
3413                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3414                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3415                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3416                                  PIPE_CONFIG(ADDR_SURF_P2) |
3417                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3418                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3419                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3420                                  PIPE_CONFIG(ADDR_SURF_P2) |
3421                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3422                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3423                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3424                                  PIPE_CONFIG(ADDR_SURF_P2) |
3425                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3426                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3427                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3428                                  PIPE_CONFIG(ADDR_SURF_P2) |
3429                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3430                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3431                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3432                                  PIPE_CONFIG(ADDR_SURF_P2) |
3433                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3434                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3435
3436                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3437                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3438                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3439                                 NUM_BANKS(ADDR_SURF_8_BANK));
3440                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3441                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3442                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3443                                 NUM_BANKS(ADDR_SURF_8_BANK));
3444                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3445                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3446                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3447                                 NUM_BANKS(ADDR_SURF_8_BANK));
3448                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3449                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3450                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3451                                 NUM_BANKS(ADDR_SURF_8_BANK));
3452                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3453                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3454                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3455                                 NUM_BANKS(ADDR_SURF_8_BANK));
3456                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3457                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3458                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3459                                 NUM_BANKS(ADDR_SURF_8_BANK));
3460                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3461                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3462                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3463                                 NUM_BANKS(ADDR_SURF_8_BANK));
3464                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3465                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3466                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3467                                 NUM_BANKS(ADDR_SURF_16_BANK));
3468                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3469                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3470                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3471                                 NUM_BANKS(ADDR_SURF_16_BANK));
3472                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3473                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3474                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3475                                  NUM_BANKS(ADDR_SURF_16_BANK));
3476                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3477                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3478                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3479                                  NUM_BANKS(ADDR_SURF_16_BANK));
3480                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3481                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3482                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3483                                  NUM_BANKS(ADDR_SURF_16_BANK));
3484                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3485                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3486                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3487                                  NUM_BANKS(ADDR_SURF_16_BANK));
3488                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3489                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3490                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3491                                  NUM_BANKS(ADDR_SURF_8_BANK));
3492
3493                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3494                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3495                             reg_offset != 23)
3496                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3497
3498                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3499                         if (reg_offset != 7)
3500                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3501
3502                 break;
3503         }
3504 }
3505
3506 static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
3507                                   u32 se_num, u32 sh_num, u32 instance)
3508 {
3509         u32 data;
3510
3511         if (instance == 0xffffffff)
3512                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
3513         else
3514                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
3515
3516         if (se_num == 0xffffffff)
3517                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3518         else
3519                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3520
3521         if (sh_num == 0xffffffff)
3522                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3523         else
3524                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3525
3526         WREG32(mmGRBM_GFX_INDEX, data);
3527 }
3528
3529 static void gfx_v8_0_select_me_pipe_q(struct amdgpu_device *adev,
3530                                   u32 me, u32 pipe, u32 q)
3531 {
3532         vi_srbm_select(adev, me, pipe, q, 0);
3533 }
3534
3535 static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
3536 {
3537         u32 data, mask;
3538
3539         data =  RREG32(mmCC_RB_BACKEND_DISABLE) |
3540                 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3541
3542         data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE);
3543
3544         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
3545                                          adev->gfx.config.max_sh_per_se);
3546
3547         return (~data) & mask;
3548 }
3549
3550 static void
3551 gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1)
3552 {
3553         switch (adev->asic_type) {
3554         case CHIP_FIJI:
3555         case CHIP_VEGAM:
3556                 *rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) |
3557                           RB_XSEL2(1) | PKR_MAP(2) |
3558                           PKR_XSEL(1) | PKR_YSEL(1) |
3559                           SE_MAP(2) | SE_XSEL(2) | SE_YSEL(3);
3560                 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) |
3561                            SE_PAIR_YSEL(2);
3562                 break;
3563         case CHIP_TONGA:
3564         case CHIP_POLARIS10:
3565                 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3566                           SE_XSEL(1) | SE_YSEL(1);
3567                 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(2) |
3568                            SE_PAIR_YSEL(2);
3569                 break;
3570         case CHIP_TOPAZ:
3571         case CHIP_CARRIZO:
3572                 *rconf |= RB_MAP_PKR0(2);
3573                 *rconf1 |= 0x0;
3574                 break;
3575         case CHIP_POLARIS11:
3576         case CHIP_POLARIS12:
3577                 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3578                           SE_XSEL(1) | SE_YSEL(1);
3579                 *rconf1 |= 0x0;
3580                 break;
3581         case CHIP_STONEY:
3582                 *rconf |= 0x0;
3583                 *rconf1 |= 0x0;
3584                 break;
3585         default:
3586                 DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type);
3587                 break;
3588         }
3589 }
3590
3591 static void
3592 gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev,
3593                                         u32 raster_config, u32 raster_config_1,
3594                                         unsigned rb_mask, unsigned num_rb)
3595 {
3596         unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1);
3597         unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1);
3598         unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2);
3599         unsigned rb_per_se = num_rb / num_se;
3600         unsigned se_mask[4];
3601         unsigned se;
3602
3603         se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask;
3604         se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask;
3605         se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask;
3606         se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask;
3607
3608         WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4));
3609         WARN_ON(!(sh_per_se == 1 || sh_per_se == 2));
3610         WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2));
3611
3612         if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
3613                              (!se_mask[2] && !se_mask[3]))) {
3614                 raster_config_1 &= ~SE_PAIR_MAP_MASK;
3615
3616                 if (!se_mask[0] && !se_mask[1]) {
3617                         raster_config_1 |=
3618                                 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3);
3619                 } else {
3620                         raster_config_1 |=
3621                                 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0);
3622                 }
3623         }
3624
3625         for (se = 0; se < num_se; se++) {
3626                 unsigned raster_config_se = raster_config;
3627                 unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
3628                 unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
3629                 int idx = (se / 2) * 2;
3630
3631                 if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
3632                         raster_config_se &= ~SE_MAP_MASK;
3633
3634                         if (!se_mask[idx]) {
3635                                 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3);
3636                         } else {
3637                                 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0);
3638                         }
3639                 }
3640
3641                 pkr0_mask &= rb_mask;
3642                 pkr1_mask &= rb_mask;
3643                 if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
3644                         raster_config_se &= ~PKR_MAP_MASK;
3645
3646                         if (!pkr0_mask) {
3647                                 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3);
3648                         } else {
3649                                 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0);
3650                         }
3651                 }
3652
3653                 if (rb_per_se >= 2) {
3654                         unsigned rb0_mask = 1 << (se * rb_per_se);
3655                         unsigned rb1_mask = rb0_mask << 1;
3656
3657                         rb0_mask &= rb_mask;
3658                         rb1_mask &= rb_mask;
3659                         if (!rb0_mask || !rb1_mask) {
3660                                 raster_config_se &= ~RB_MAP_PKR0_MASK;
3661
3662                                 if (!rb0_mask) {
3663                                         raster_config_se |=
3664                                                 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3);
3665                                 } else {
3666                                         raster_config_se |=
3667                                                 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0);
3668                                 }
3669                         }
3670
3671                         if (rb_per_se > 2) {
3672                                 rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
3673                                 rb1_mask = rb0_mask << 1;
3674                                 rb0_mask &= rb_mask;
3675                                 rb1_mask &= rb_mask;
3676                                 if (!rb0_mask || !rb1_mask) {
3677                                         raster_config_se &= ~RB_MAP_PKR1_MASK;
3678
3679                                         if (!rb0_mask) {
3680                                                 raster_config_se |=
3681                                                         RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3);
3682                                         } else {
3683                                                 raster_config_se |=
3684                                                         RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0);
3685                                         }
3686                                 }
3687                         }
3688                 }
3689
3690                 /* GRBM_GFX_INDEX has a different offset on VI */
3691                 gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff);
3692                 WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se);
3693                 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3694         }
3695
3696         /* GRBM_GFX_INDEX has a different offset on VI */
3697         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3698 }
3699
3700 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
3701 {
3702         int i, j;
3703         u32 data;
3704         u32 raster_config = 0, raster_config_1 = 0;
3705         u32 active_rbs = 0;
3706         u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
3707                                         adev->gfx.config.max_sh_per_se;
3708         unsigned num_rb_pipes;
3709
3710         mutex_lock(&adev->grbm_idx_mutex);
3711         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3712                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3713                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3714                         data = gfx_v8_0_get_rb_active_bitmap(adev);
3715                         active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
3716                                                rb_bitmap_width_per_sh);
3717                 }
3718         }
3719         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3720
3721         adev->gfx.config.backend_enable_mask = active_rbs;
3722         adev->gfx.config.num_rbs = hweight32(active_rbs);
3723
3724         num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se *
3725                              adev->gfx.config.max_shader_engines, 16);
3726
3727         gfx_v8_0_raster_config(adev, &raster_config, &raster_config_1);
3728
3729         if (!adev->gfx.config.backend_enable_mask ||
3730                         adev->gfx.config.num_rbs >= num_rb_pipes) {
3731                 WREG32(mmPA_SC_RASTER_CONFIG, raster_config);
3732                 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3733         } else {
3734                 gfx_v8_0_write_harvested_raster_configs(adev, raster_config, raster_config_1,
3735                                                         adev->gfx.config.backend_enable_mask,
3736                                                         num_rb_pipes);
3737         }
3738
3739         /* cache the values for userspace */
3740         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3741                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3742                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3743                         adev->gfx.config.rb_config[i][j].rb_backend_disable =
3744                                 RREG32(mmCC_RB_BACKEND_DISABLE);
3745                         adev->gfx.config.rb_config[i][j].user_rb_backend_disable =
3746                                 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3747                         adev->gfx.config.rb_config[i][j].raster_config =
3748                                 RREG32(mmPA_SC_RASTER_CONFIG);
3749                         adev->gfx.config.rb_config[i][j].raster_config_1 =
3750                                 RREG32(mmPA_SC_RASTER_CONFIG_1);
3751                 }
3752         }
3753         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3754         mutex_unlock(&adev->grbm_idx_mutex);
3755 }
3756
3757 /**
3758  * gfx_v8_0_init_compute_vmid - gart enable
3759  *
3760  * @adev: amdgpu_device pointer
3761  *
3762  * Initialize compute vmid sh_mem registers
3763  *
3764  */
3765 #define DEFAULT_SH_MEM_BASES    (0x6000)
3766 #define FIRST_COMPUTE_VMID      (8)
3767 #define LAST_COMPUTE_VMID       (16)
3768 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
3769 {
3770         int i;
3771         uint32_t sh_mem_config;
3772         uint32_t sh_mem_bases;
3773
3774         /*
3775          * Configure apertures:
3776          * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
3777          * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
3778          * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
3779          */
3780         sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
3781
3782         sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
3783                         SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
3784                         SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
3785                         SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
3786                         MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
3787                         SH_MEM_CONFIG__PRIVATE_ATC_MASK;
3788
3789         mutex_lock(&adev->srbm_mutex);
3790         for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
3791                 vi_srbm_select(adev, 0, 0, 0, i);
3792                 /* CP and shaders */
3793                 WREG32(mmSH_MEM_CONFIG, sh_mem_config);
3794                 WREG32(mmSH_MEM_APE1_BASE, 1);
3795                 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3796                 WREG32(mmSH_MEM_BASES, sh_mem_bases);
3797         }
3798         vi_srbm_select(adev, 0, 0, 0, 0);
3799         mutex_unlock(&adev->srbm_mutex);
3800 }
3801
3802 static void gfx_v8_0_config_init(struct amdgpu_device *adev)
3803 {
3804         switch (adev->asic_type) {
3805         default:
3806                 adev->gfx.config.double_offchip_lds_buf = 1;
3807                 break;
3808         case CHIP_CARRIZO:
3809         case CHIP_STONEY:
3810                 adev->gfx.config.double_offchip_lds_buf = 0;
3811                 break;
3812         }
3813 }
3814
3815 static void gfx_v8_0_constants_init(struct amdgpu_device *adev)
3816 {
3817         u32 tmp, sh_static_mem_cfg;
3818         int i;
3819
3820         WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF);
3821         WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3822         WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3823         WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
3824
3825         gfx_v8_0_tiling_mode_table_init(adev);
3826         gfx_v8_0_setup_rb(adev);
3827         gfx_v8_0_get_cu_info(adev);
3828         gfx_v8_0_config_init(adev);
3829
3830         /* XXX SH_MEM regs */
3831         /* where to put LDS, scratch, GPUVM in FSA64 space */
3832         sh_static_mem_cfg = REG_SET_FIELD(0, SH_STATIC_MEM_CONFIG,
3833                                    SWIZZLE_ENABLE, 1);
3834         sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3835                                    ELEMENT_SIZE, 1);
3836         sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3837                                    INDEX_STRIDE, 3);
3838         WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg);
3839
3840         mutex_lock(&adev->srbm_mutex);
3841         for (i = 0; i < adev->vm_manager.id_mgr[0].num_ids; i++) {
3842                 vi_srbm_select(adev, 0, 0, 0, i);
3843                 /* CP and shaders */
3844                 if (i == 0) {
3845                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
3846                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3847                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3848                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3849                         WREG32(mmSH_MEM_CONFIG, tmp);
3850                         WREG32(mmSH_MEM_BASES, 0);
3851                 } else {
3852                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
3853                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3854                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3855                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3856                         WREG32(mmSH_MEM_CONFIG, tmp);
3857                         tmp = adev->gmc.shared_aperture_start >> 48;
3858                         WREG32(mmSH_MEM_BASES, tmp);
3859                 }
3860
3861                 WREG32(mmSH_MEM_APE1_BASE, 1);
3862                 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3863         }
3864         vi_srbm_select(adev, 0, 0, 0, 0);
3865         mutex_unlock(&adev->srbm_mutex);
3866
3867         gfx_v8_0_init_compute_vmid(adev);
3868
3869         mutex_lock(&adev->grbm_idx_mutex);
3870         /*
3871          * making sure that the following register writes will be broadcasted
3872          * to all the shaders
3873          */
3874         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3875
3876         WREG32(mmPA_SC_FIFO_SIZE,
3877                    (adev->gfx.config.sc_prim_fifo_size_frontend <<
3878                         PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
3879                    (adev->gfx.config.sc_prim_fifo_size_backend <<
3880                         PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
3881                    (adev->gfx.config.sc_hiz_tile_fifo_size <<
3882                         PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
3883                    (adev->gfx.config.sc_earlyz_tile_fifo_size <<
3884                         PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
3885
3886         tmp = RREG32(mmSPI_ARB_PRIORITY);
3887         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS0, 2);
3888         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS1, 2);
3889         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS2, 2);
3890         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS3, 2);
3891         WREG32(mmSPI_ARB_PRIORITY, tmp);
3892
3893         mutex_unlock(&adev->grbm_idx_mutex);
3894
3895 }
3896
3897 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3898 {
3899         u32 i, j, k;
3900         u32 mask;
3901
3902         mutex_lock(&adev->grbm_idx_mutex);
3903         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3904                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3905                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3906                         for (k = 0; k < adev->usec_timeout; k++) {
3907                                 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3908                                         break;
3909                                 udelay(1);
3910                         }
3911                         if (k == adev->usec_timeout) {
3912                                 gfx_v8_0_select_se_sh(adev, 0xffffffff,
3913                                                       0xffffffff, 0xffffffff);
3914                                 mutex_unlock(&adev->grbm_idx_mutex);
3915                                 DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
3916                                          i, j);
3917                                 return;
3918                         }
3919                 }
3920         }
3921         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3922         mutex_unlock(&adev->grbm_idx_mutex);
3923
3924         mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3925                 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3926                 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3927                 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3928         for (k = 0; k < adev->usec_timeout; k++) {
3929                 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3930                         break;
3931                 udelay(1);
3932         }
3933 }
3934
3935 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3936                                                bool enable)
3937 {
3938         u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3939
3940         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
3941         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
3942         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
3943         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
3944
3945         WREG32(mmCP_INT_CNTL_RING0, tmp);
3946 }
3947
3948 static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
3949 {
3950         /* csib */
3951         WREG32(mmRLC_CSIB_ADDR_HI,
3952                         adev->gfx.rlc.clear_state_gpu_addr >> 32);
3953         WREG32(mmRLC_CSIB_ADDR_LO,
3954                         adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
3955         WREG32(mmRLC_CSIB_LENGTH,
3956                         adev->gfx.rlc.clear_state_size);
3957 }
3958
3959 static void gfx_v8_0_parse_ind_reg_list(int *register_list_format,
3960                                 int ind_offset,
3961                                 int list_size,
3962                                 int *unique_indices,
3963                                 int *indices_count,
3964                                 int max_indices,
3965                                 int *ind_start_offsets,
3966                                 int *offset_count,
3967                                 int max_offset)
3968 {
3969         int indices;
3970         bool new_entry = true;
3971
3972         for (; ind_offset < list_size; ind_offset++) {
3973
3974                 if (new_entry) {
3975                         new_entry = false;
3976                         ind_start_offsets[*offset_count] = ind_offset;
3977                         *offset_count = *offset_count + 1;
3978                         BUG_ON(*offset_count >= max_offset);
3979                 }
3980
3981                 if (register_list_format[ind_offset] == 0xFFFFFFFF) {
3982                         new_entry = true;
3983                         continue;
3984                 }
3985
3986                 ind_offset += 2;
3987
3988                 /* look for the matching indice */
3989                 for (indices = 0;
3990                         indices < *indices_count;
3991                         indices++) {
3992                         if (unique_indices[indices] ==
3993                                 register_list_format[ind_offset])
3994                                 break;
3995                 }
3996
3997                 if (indices >= *indices_count) {
3998                         unique_indices[*indices_count] =
3999                                 register_list_format[ind_offset];
4000                         indices = *indices_count;
4001                         *indices_count = *indices_count + 1;
4002                         BUG_ON(*indices_count >= max_indices);
4003                 }
4004
4005                 register_list_format[ind_offset] = indices;
4006         }
4007 }
4008
4009 static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
4010 {
4011         int i, temp, data;
4012         int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0};
4013         int indices_count = 0;
4014         int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
4015         int offset_count = 0;
4016
4017         int list_size;
4018         unsigned int *register_list_format =
4019                 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
4020         if (!register_list_format)
4021                 return -ENOMEM;
4022         memcpy(register_list_format, adev->gfx.rlc.register_list_format,
4023                         adev->gfx.rlc.reg_list_format_size_bytes);
4024
4025         gfx_v8_0_parse_ind_reg_list(register_list_format,
4026                                 RLC_FormatDirectRegListLength,
4027                                 adev->gfx.rlc.reg_list_format_size_bytes >> 2,
4028                                 unique_indices,
4029                                 &indices_count,
4030                                 ARRAY_SIZE(unique_indices),
4031                                 indirect_start_offsets,
4032                                 &offset_count,
4033                                 ARRAY_SIZE(indirect_start_offsets));
4034
4035         /* save and restore list */
4036         WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1);
4037
4038         WREG32(mmRLC_SRM_ARAM_ADDR, 0);
4039         for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
4040                 WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]);
4041
4042         /* indirect list */
4043         WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start);
4044         for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
4045                 WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]);
4046
4047         list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
4048         list_size = list_size >> 1;
4049         WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size);
4050         WREG32(mmRLC_GPM_SCRATCH_DATA, list_size);
4051
4052         /* starting offsets starts */
4053         WREG32(mmRLC_GPM_SCRATCH_ADDR,
4054                 adev->gfx.rlc.starting_offsets_start);
4055         for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
4056                 WREG32(mmRLC_GPM_SCRATCH_DATA,
4057                                 indirect_start_offsets[i]);
4058
4059         /* unique indices */
4060         temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
4061         data = mmRLC_SRM_INDEX_CNTL_DATA_0;
4062         for (i = 0; i < ARRAY_SIZE(unique_indices); i++) {
4063                 if (unique_indices[i] != 0) {
4064                         WREG32(temp + i, unique_indices[i] & 0x3FFFF);
4065                         WREG32(data + i, unique_indices[i] >> 20);
4066                 }
4067         }
4068         kfree(register_list_format);
4069
4070         return 0;
4071 }
4072
4073 static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
4074 {
4075         WREG32_FIELD(RLC_SRM_CNTL, SRM_ENABLE, 1);
4076 }
4077
4078 static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev)
4079 {
4080         uint32_t data;
4081
4082         WREG32_FIELD(CP_RB_WPTR_POLL_CNTL, IDLE_POLL_COUNT, 0x60);
4083
4084         data = REG_SET_FIELD(0, RLC_PG_DELAY, POWER_UP_DELAY, 0x10);
4085         data = REG_SET_FIELD(data, RLC_PG_DELAY, POWER_DOWN_DELAY, 0x10);
4086         data = REG_SET_FIELD(data, RLC_PG_DELAY, CMD_PROPAGATE_DELAY, 0x10);
4087         data = REG_SET_FIELD(data, RLC_PG_DELAY, MEM_SLEEP_DELAY, 0x10);
4088         WREG32(mmRLC_PG_DELAY, data);
4089
4090         WREG32_FIELD(RLC_PG_DELAY_2, SERDES_CMD_DELAY, 0x3);
4091         WREG32_FIELD(RLC_AUTO_PG_CTRL, GRBM_REG_SAVE_GFX_IDLE_THRESHOLD, 0x55f0);
4092
4093 }
4094
4095 static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
4096                                                 bool enable)
4097 {
4098         WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PU_ENABLE, enable ? 1 : 0);
4099 }
4100
4101 static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
4102                                                   bool enable)
4103 {
4104         WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PD_ENABLE, enable ? 1 : 0);
4105 }
4106
4107 static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable)
4108 {
4109         WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 0 : 1);
4110 }
4111
4112 static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
4113 {
4114         if ((adev->asic_type == CHIP_CARRIZO) ||
4115             (adev->asic_type == CHIP_STONEY)) {
4116                 gfx_v8_0_init_csb(adev);
4117                 gfx_v8_0_init_save_restore_list(adev);
4118                 gfx_v8_0_enable_save_restore_machine(adev);
4119                 WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
4120                 gfx_v8_0_init_power_gating(adev);
4121                 WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
4122         } else if ((adev->asic_type == CHIP_POLARIS11) ||
4123                    (adev->asic_type == CHIP_POLARIS12) ||
4124                    (adev->asic_type == CHIP_VEGAM)) {
4125                 gfx_v8_0_init_csb(adev);
4126                 gfx_v8_0_init_save_restore_list(adev);
4127                 gfx_v8_0_enable_save_restore_machine(adev);
4128                 gfx_v8_0_init_power_gating(adev);
4129         }
4130
4131 }
4132
4133 static void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
4134 {
4135         WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 0);
4136
4137         gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4138         gfx_v8_0_wait_for_rlc_serdes(adev);
4139 }
4140
4141 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
4142 {
4143         WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4144         udelay(50);
4145
4146         WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
4147         udelay(50);
4148 }
4149
4150 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
4151 {
4152         WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 1);
4153
4154         /* carrizo do enable cp interrupt after cp inited */
4155         if (!(adev->flags & AMD_IS_APU))
4156                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4157
4158         udelay(50);
4159 }
4160
4161 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
4162 {
4163         gfx_v8_0_rlc_stop(adev);
4164         gfx_v8_0_rlc_reset(adev);
4165         gfx_v8_0_init_pg(adev);
4166         gfx_v8_0_rlc_start(adev);
4167
4168         return 0;
4169 }
4170
4171 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
4172 {
4173         int i;
4174         u32 tmp = RREG32(mmCP_ME_CNTL);
4175
4176         if (enable) {
4177                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
4178                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
4179                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
4180         } else {
4181                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
4182                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
4183                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
4184                 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
4185                         adev->gfx.gfx_ring[i].sched.ready = false;
4186         }
4187         WREG32(mmCP_ME_CNTL, tmp);
4188         udelay(50);
4189 }
4190
4191 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
4192 {
4193         u32 count = 0;
4194         const struct cs_section_def *sect = NULL;
4195         const struct cs_extent_def *ext = NULL;
4196
4197         /* begin clear state */
4198         count += 2;
4199         /* context control state */
4200         count += 3;
4201
4202         for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4203                 for (ext = sect->section; ext->extent != NULL; ++ext) {
4204                         if (sect->id == SECT_CONTEXT)
4205                                 count += 2 + ext->reg_count;
4206                         else
4207                                 return 0;
4208                 }
4209         }
4210         /* pa_sc_raster_config/pa_sc_raster_config1 */
4211         count += 4;
4212         /* end clear state */
4213         count += 2;
4214         /* clear state */
4215         count += 2;
4216
4217         return count;
4218 }
4219
4220 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
4221 {
4222         struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
4223         const struct cs_section_def *sect = NULL;
4224         const struct cs_extent_def *ext = NULL;
4225         int r, i;
4226
4227         /* init the CP */
4228         WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
4229         WREG32(mmCP_ENDIAN_SWAP, 0);
4230         WREG32(mmCP_DEVICE_ID, 1);
4231
4232         gfx_v8_0_cp_gfx_enable(adev, true);
4233
4234         r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
4235         if (r) {
4236                 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
4237                 return r;
4238         }
4239
4240         /* clear state buffer */
4241         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4242         amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4243
4244         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4245         amdgpu_ring_write(ring, 0x80000000);
4246         amdgpu_ring_write(ring, 0x80000000);
4247
4248         for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4249                 for (ext = sect->section; ext->extent != NULL; ++ext) {
4250                         if (sect->id == SECT_CONTEXT) {
4251                                 amdgpu_ring_write(ring,
4252                                        PACKET3(PACKET3_SET_CONTEXT_REG,
4253                                                ext->reg_count));
4254                                 amdgpu_ring_write(ring,
4255                                        ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
4256                                 for (i = 0; i < ext->reg_count; i++)
4257                                         amdgpu_ring_write(ring, ext->extent[i]);
4258                         }
4259                 }
4260         }
4261
4262         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4263         amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
4264         amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config);
4265         amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config_1);
4266
4267         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4268         amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4269
4270         amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4271         amdgpu_ring_write(ring, 0);
4272
4273         /* init the CE partitions */
4274         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4275         amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4276         amdgpu_ring_write(ring, 0x8000);
4277         amdgpu_ring_write(ring, 0x8000);
4278
4279         amdgpu_ring_commit(ring);
4280
4281         return 0;
4282 }
4283 static void gfx_v8_0_set_cpg_door_bell(struct amdgpu_device *adev, struct amdgpu_ring *ring)
4284 {
4285         u32 tmp;
4286         /* no gfx doorbells on iceland */
4287         if (adev->asic_type == CHIP_TOPAZ)
4288                 return;
4289
4290         tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
4291
4292         if (ring->use_doorbell) {
4293                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4294                                 DOORBELL_OFFSET, ring->doorbell_index);
4295                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4296                                                 DOORBELL_HIT, 0);
4297                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4298                                             DOORBELL_EN, 1);
4299         } else {
4300                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
4301         }
4302
4303         WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
4304
4305         if (adev->flags & AMD_IS_APU)
4306                 return;
4307
4308         tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
4309                                         DOORBELL_RANGE_LOWER,
4310                                         AMDGPU_DOORBELL_GFX_RING0);
4311         WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
4312
4313         WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
4314                 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
4315 }
4316
4317 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
4318 {
4319         struct amdgpu_ring *ring;
4320         u32 tmp;
4321         u32 rb_bufsz;
4322         u64 rb_addr, rptr_addr, wptr_gpu_addr;
4323         int r;
4324
4325         /* Set the write pointer delay */
4326         WREG32(mmCP_RB_WPTR_DELAY, 0);
4327
4328         /* set the RB to use vmid 0 */
4329         WREG32(mmCP_RB_VMID, 0);
4330
4331         /* Set ring buffer size */
4332         ring = &adev->gfx.gfx_ring[0];
4333         rb_bufsz = order_base_2(ring->ring_size / 8);
4334         tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
4335         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
4336         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
4337         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
4338 #ifdef __BIG_ENDIAN
4339         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
4340 #endif
4341         WREG32(mmCP_RB0_CNTL, tmp);
4342
4343         /* Initialize the ring buffer's read and write pointers */
4344         WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
4345         ring->wptr = 0;
4346         WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
4347
4348         /* set the wb address wether it's enabled or not */
4349         rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4350         WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
4351         WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
4352
4353         wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4354         WREG32(mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
4355         WREG32(mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
4356         mdelay(1);
4357         WREG32(mmCP_RB0_CNTL, tmp);
4358
4359         rb_addr = ring->gpu_addr >> 8;
4360         WREG32(mmCP_RB0_BASE, rb_addr);
4361         WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
4362
4363         gfx_v8_0_set_cpg_door_bell(adev, ring);
4364         /* start the ring */
4365         amdgpu_ring_clear_ring(ring);
4366         gfx_v8_0_cp_gfx_start(adev);
4367         ring->sched.ready = true;
4368         r = amdgpu_ring_test_helper(ring);
4369
4370         return r;
4371 }
4372
4373 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
4374 {
4375         int i;
4376
4377         if (enable) {
4378                 WREG32(mmCP_MEC_CNTL, 0);
4379         } else {
4380                 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
4381                 for (i = 0; i < adev->gfx.num_compute_rings; i++)
4382                         adev->gfx.compute_ring[i].sched.ready = false;
4383                 adev->gfx.kiq.ring.sched.ready = false;
4384         }
4385         udelay(50);
4386 }
4387
4388 /* KIQ functions */
4389 static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring)
4390 {
4391         uint32_t tmp;
4392         struct amdgpu_device *adev = ring->adev;
4393
4394         /* tell RLC which is KIQ queue */
4395         tmp = RREG32(mmRLC_CP_SCHEDULERS);
4396         tmp &= 0xffffff00;
4397         tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
4398         WREG32(mmRLC_CP_SCHEDULERS, tmp);
4399         tmp |= 0x80;
4400         WREG32(mmRLC_CP_SCHEDULERS, tmp);
4401 }
4402
4403 static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev)
4404 {
4405         struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
4406         uint64_t queue_mask = 0;
4407         int r, i;
4408
4409         for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
4410                 if (!test_bit(i, adev->gfx.mec.queue_bitmap))
4411                         continue;
4412
4413                 /* This situation may be hit in the future if a new HW
4414                  * generation exposes more than 64 queues. If so, the
4415                  * definition of queue_mask needs updating */
4416                 if (WARN_ON(i >= (sizeof(queue_mask)*8))) {
4417                         DRM_ERROR("Invalid KCQ enabled: %d\n", i);
4418                         break;
4419                 }
4420
4421                 queue_mask |= (1ull << i);
4422         }
4423
4424         r = amdgpu_ring_alloc(kiq_ring, (8 * adev->gfx.num_compute_rings) + 8);
4425         if (r) {
4426                 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
4427                 return r;
4428         }
4429         /* set resources */
4430         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
4431         amdgpu_ring_write(kiq_ring, 0); /* vmid_mask:0 queue_type:0 (KIQ) */
4432         amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */
4433         amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */
4434         amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
4435         amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
4436         amdgpu_ring_write(kiq_ring, 0); /* oac mask */
4437         amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
4438         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4439                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4440                 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
4441                 uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4442
4443                 /* map queues */
4444                 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
4445                 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
4446                 amdgpu_ring_write(kiq_ring,
4447                                   PACKET3_MAP_QUEUES_NUM_QUEUES(1));
4448                 amdgpu_ring_write(kiq_ring,
4449                                   PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index) |
4450                                   PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
4451                                   PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
4452                                   PACKET3_MAP_QUEUES_ME(ring->me == 1 ? 0 : 1)); /* doorbell */
4453                 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
4454                 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
4455                 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
4456                 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
4457         }
4458
4459         r = amdgpu_ring_test_helper(kiq_ring);
4460         if (r)
4461                 DRM_ERROR("KCQ enable failed\n");
4462         return r;
4463 }
4464
4465 static int gfx_v8_0_deactivate_hqd(struct amdgpu_device *adev, u32 req)
4466 {
4467         int i, r = 0;
4468
4469         if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) {
4470                 WREG32_FIELD(CP_HQD_DEQUEUE_REQUEST, DEQUEUE_REQ, req);
4471                 for (i = 0; i < adev->usec_timeout; i++) {
4472                         if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK))
4473                                 break;
4474                         udelay(1);
4475                 }
4476                 if (i == adev->usec_timeout)
4477                         r = -ETIMEDOUT;
4478         }
4479         WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
4480         WREG32(mmCP_HQD_PQ_RPTR, 0);
4481         WREG32(mmCP_HQD_PQ_WPTR, 0);
4482
4483         return r;
4484 }
4485
4486 static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
4487 {
4488         struct amdgpu_device *adev = ring->adev;
4489         struct vi_mqd *mqd = ring->mqd_ptr;
4490         uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
4491         uint32_t tmp;
4492
4493         mqd->header = 0xC0310800;
4494         mqd->compute_pipelinestat_enable = 0x00000001;
4495         mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4496         mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4497         mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4498         mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4499         mqd->compute_misc_reserved = 0x00000003;
4500         mqd->dynamic_cu_mask_addr_lo = lower_32_bits(ring->mqd_gpu_addr
4501                                                      + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4502         mqd->dynamic_cu_mask_addr_hi = upper_32_bits(ring->mqd_gpu_addr
4503                                                      + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4504         eop_base_addr = ring->eop_gpu_addr >> 8;
4505         mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
4506         mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
4507
4508         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4509         tmp = RREG32(mmCP_HQD_EOP_CONTROL);
4510         tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
4511                         (order_base_2(GFX8_MEC_HPD_SIZE / 4) - 1));
4512
4513         mqd->cp_hqd_eop_control = tmp;
4514
4515         /* enable doorbell? */
4516         tmp = REG_SET_FIELD(RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL),
4517                             CP_HQD_PQ_DOORBELL_CONTROL,
4518                             DOORBELL_EN,
4519                             ring->use_doorbell ? 1 : 0);
4520
4521         mqd->cp_hqd_pq_doorbell_control = tmp;
4522
4523         /* set the pointer to the MQD */
4524         mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
4525         mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
4526
4527         /* set MQD vmid to 0 */
4528         tmp = RREG32(mmCP_MQD_CONTROL);
4529         tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4530         mqd->cp_mqd_control = tmp;
4531
4532         /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4533         hqd_gpu_addr = ring->gpu_addr >> 8;
4534         mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4535         mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4536
4537         /* set up the HQD, this is similar to CP_RB0_CNTL */
4538         tmp = RREG32(mmCP_HQD_PQ_CONTROL);
4539         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4540                             (order_base_2(ring->ring_size / 4) - 1));
4541         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4542                         ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
4543 #ifdef __BIG_ENDIAN
4544         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
4545 #endif
4546         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
4547         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
4548         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4549         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4550         mqd->cp_hqd_pq_control = tmp;
4551
4552         /* set the wb address whether it's enabled or not */
4553         wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4554         mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4555         mqd->cp_hqd_pq_rptr_report_addr_hi =
4556                 upper_32_bits(wb_gpu_addr) & 0xffff;
4557
4558         /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4559         wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4560         mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
4561         mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4562
4563         tmp = 0;
4564         /* enable the doorbell if requested */
4565         if (ring->use_doorbell) {
4566                 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4567                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4568                                 DOORBELL_OFFSET, ring->doorbell_index);
4569
4570                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4571                                          DOORBELL_EN, 1);
4572                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4573                                          DOORBELL_SOURCE, 0);
4574                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4575                                          DOORBELL_HIT, 0);
4576         }
4577
4578         mqd->cp_hqd_pq_doorbell_control = tmp;
4579
4580         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4581         ring->wptr = 0;
4582         mqd->cp_hqd_pq_wptr = ring->wptr;
4583         mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
4584
4585         /* set the vmid for the queue */
4586         mqd->cp_hqd_vmid = 0;
4587
4588         tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
4589         tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
4590         mqd->cp_hqd_persistent_state = tmp;
4591
4592         /* set MTYPE */
4593         tmp = RREG32(mmCP_HQD_IB_CONTROL);
4594         tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
4595         tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MTYPE, 3);
4596         mqd->cp_hqd_ib_control = tmp;
4597
4598         tmp = RREG32(mmCP_HQD_IQ_TIMER);
4599         tmp = REG_SET_FIELD(tmp, CP_HQD_IQ_TIMER, MTYPE, 3);
4600         mqd->cp_hqd_iq_timer = tmp;
4601
4602         tmp = RREG32(mmCP_HQD_CTX_SAVE_CONTROL);
4603         tmp = REG_SET_FIELD(tmp, CP_HQD_CTX_SAVE_CONTROL, MTYPE, 3);
4604         mqd->cp_hqd_ctx_save_control = tmp;
4605
4606         /* defaults */
4607         mqd->cp_hqd_eop_rptr = RREG32(mmCP_HQD_EOP_RPTR);
4608         mqd->cp_hqd_eop_wptr = RREG32(mmCP_HQD_EOP_WPTR);
4609         mqd->cp_hqd_pipe_priority = RREG32(mmCP_HQD_PIPE_PRIORITY);
4610         mqd->cp_hqd_queue_priority = RREG32(mmCP_HQD_QUEUE_PRIORITY);
4611         mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM);
4612         mqd->cp_hqd_ctx_save_base_addr_lo = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_LO);
4613         mqd->cp_hqd_ctx_save_base_addr_hi = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_HI);
4614         mqd->cp_hqd_cntl_stack_offset = RREG32(mmCP_HQD_CNTL_STACK_OFFSET);
4615         mqd->cp_hqd_cntl_stack_size = RREG32(mmCP_HQD_CNTL_STACK_SIZE);
4616         mqd->cp_hqd_wg_state_offset = RREG32(mmCP_HQD_WG_STATE_OFFSET);
4617         mqd->cp_hqd_ctx_save_size = RREG32(mmCP_HQD_CTX_SAVE_SIZE);
4618         mqd->cp_hqd_eop_done_events = RREG32(mmCP_HQD_EOP_EVENTS);
4619         mqd->cp_hqd_error = RREG32(mmCP_HQD_ERROR);
4620         mqd->cp_hqd_eop_wptr_mem = RREG32(mmCP_HQD_EOP_WPTR_MEM);
4621         mqd->cp_hqd_eop_dones = RREG32(mmCP_HQD_EOP_DONES);
4622
4623         /* activate the queue */
4624         mqd->cp_hqd_active = 1;
4625
4626         return 0;
4627 }
4628
4629 int gfx_v8_0_mqd_commit(struct amdgpu_device *adev,
4630                         struct vi_mqd *mqd)
4631 {
4632         uint32_t mqd_reg;
4633         uint32_t *mqd_data;
4634
4635         /* HQD registers extend from mmCP_MQD_BASE_ADDR to mmCP_HQD_ERROR */
4636         mqd_data = &mqd->cp_mqd_base_addr_lo;
4637
4638         /* disable wptr polling */
4639         WREG32_FIELD(CP_PQ_WPTR_POLL_CNTL, EN, 0);
4640
4641         /* program all HQD registers */
4642         for (mqd_reg = mmCP_HQD_VMID; mqd_reg <= mmCP_HQD_EOP_CONTROL; mqd_reg++)
4643                 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4644
4645         /* Tonga errata: EOP RPTR/WPTR should be left unmodified.
4646          * This is safe since EOP RPTR==WPTR for any inactive HQD
4647          * on ASICs that do not support context-save.
4648          * EOP writes/reads can start anywhere in the ring.
4649          */
4650         if (adev->asic_type != CHIP_TONGA) {
4651                 WREG32(mmCP_HQD_EOP_RPTR, mqd->cp_hqd_eop_rptr);
4652                 WREG32(mmCP_HQD_EOP_WPTR, mqd->cp_hqd_eop_wptr);
4653                 WREG32(mmCP_HQD_EOP_WPTR_MEM, mqd->cp_hqd_eop_wptr_mem);
4654         }
4655
4656         for (mqd_reg = mmCP_HQD_EOP_EVENTS; mqd_reg <= mmCP_HQD_ERROR; mqd_reg++)
4657                 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4658
4659         /* activate the HQD */
4660         for (mqd_reg = mmCP_MQD_BASE_ADDR; mqd_reg <= mmCP_HQD_ACTIVE; mqd_reg++)
4661                 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4662
4663         return 0;
4664 }
4665
4666 static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring)
4667 {
4668         struct amdgpu_device *adev = ring->adev;
4669         struct vi_mqd *mqd = ring->mqd_ptr;
4670         int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
4671
4672         gfx_v8_0_kiq_setting(ring);
4673
4674         if (adev->in_gpu_reset) { /* for GPU_RESET case */
4675                 /* reset MQD to a clean status */
4676                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4677                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4678
4679                 /* reset ring buffer */
4680                 ring->wptr = 0;
4681                 amdgpu_ring_clear_ring(ring);
4682                 mutex_lock(&adev->srbm_mutex);
4683                 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4684                 gfx_v8_0_mqd_commit(adev, mqd);
4685                 vi_srbm_select(adev, 0, 0, 0, 0);
4686                 mutex_unlock(&adev->srbm_mutex);
4687         } else {
4688                 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4689                 ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4690                 ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
4691                 mutex_lock(&adev->srbm_mutex);
4692                 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4693                 gfx_v8_0_mqd_init(ring);
4694                 gfx_v8_0_mqd_commit(adev, mqd);
4695                 vi_srbm_select(adev, 0, 0, 0, 0);
4696                 mutex_unlock(&adev->srbm_mutex);
4697
4698                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4699                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4700         }
4701
4702         return 0;
4703 }
4704
4705 static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring)
4706 {
4707         struct amdgpu_device *adev = ring->adev;
4708         struct vi_mqd *mqd = ring->mqd_ptr;
4709         int mqd_idx = ring - &adev->gfx.compute_ring[0];
4710
4711         if (!adev->in_gpu_reset && !adev->in_suspend) {
4712                 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4713                 ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4714                 ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
4715                 mutex_lock(&adev->srbm_mutex);
4716                 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4717                 gfx_v8_0_mqd_init(ring);
4718                 vi_srbm_select(adev, 0, 0, 0, 0);
4719                 mutex_unlock(&adev->srbm_mutex);
4720
4721                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4722                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4723         } else if (adev->in_gpu_reset) { /* for GPU_RESET case */
4724                 /* reset MQD to a clean status */
4725                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4726                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4727                 /* reset ring buffer */
4728                 ring->wptr = 0;
4729                 amdgpu_ring_clear_ring(ring);
4730         } else {
4731                 amdgpu_ring_clear_ring(ring);
4732         }
4733         return 0;
4734 }
4735
4736 static void gfx_v8_0_set_mec_doorbell_range(struct amdgpu_device *adev)
4737 {
4738         if (adev->asic_type > CHIP_TONGA) {
4739                 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, AMDGPU_DOORBELL_KIQ << 2);
4740                 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, AMDGPU_DOORBELL_MEC_RING7 << 2);
4741         }
4742         /* enable doorbells */
4743         WREG32_FIELD(CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4744 }
4745
4746 static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev)
4747 {
4748         struct amdgpu_ring *ring;
4749         int r;
4750
4751         ring = &adev->gfx.kiq.ring;
4752
4753         r = amdgpu_bo_reserve(ring->mqd_obj, false);
4754         if (unlikely(r != 0))
4755                 return r;
4756
4757         r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4758         if (unlikely(r != 0))
4759                 return r;
4760
4761         gfx_v8_0_kiq_init_queue(ring);
4762         amdgpu_bo_kunmap(ring->mqd_obj);
4763         ring->mqd_ptr = NULL;
4764         amdgpu_bo_unreserve(ring->mqd_obj);
4765         ring->sched.ready = true;
4766         return 0;
4767 }
4768
4769 static int gfx_v8_0_kcq_resume(struct amdgpu_device *adev)
4770 {
4771         struct amdgpu_ring *ring = NULL;
4772         int r = 0, i;
4773
4774         gfx_v8_0_cp_compute_enable(adev, true);
4775
4776         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4777                 ring = &adev->gfx.compute_ring[i];
4778
4779                 r = amdgpu_bo_reserve(ring->mqd_obj, false);
4780                 if (unlikely(r != 0))
4781                         goto done;
4782                 r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4783                 if (!r) {
4784                         r = gfx_v8_0_kcq_init_queue(ring);
4785                         amdgpu_bo_kunmap(ring->mqd_obj);
4786                         ring->mqd_ptr = NULL;
4787                 }
4788                 amdgpu_bo_unreserve(ring->mqd_obj);
4789                 if (r)
4790                         goto done;
4791         }
4792
4793         gfx_v8_0_set_mec_doorbell_range(adev);
4794
4795         r = gfx_v8_0_kiq_kcq_enable(adev);
4796         if (r)
4797                 goto done;
4798
4799         /* Test KCQs - reversing the order of rings seems to fix ring test failure
4800          * after GPU reset
4801          */
4802         for (i = adev->gfx.num_compute_rings - 1; i >= 0; i--) {
4803                 ring = &adev->gfx.compute_ring[i];
4804                 r = amdgpu_ring_test_helper(ring);
4805         }
4806
4807 done:
4808         return r;
4809 }
4810
4811 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
4812 {
4813         int r;
4814
4815         if (!(adev->flags & AMD_IS_APU))
4816                 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4817
4818         r = gfx_v8_0_kiq_resume(adev);
4819         if (r)
4820                 return r;
4821
4822         r = gfx_v8_0_cp_gfx_resume(adev);
4823         if (r)
4824                 return r;
4825
4826         r = gfx_v8_0_kcq_resume(adev);
4827         if (r)
4828                 return r;
4829         gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4830
4831         return 0;
4832 }
4833
4834 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
4835 {
4836         gfx_v8_0_cp_gfx_enable(adev, enable);
4837         gfx_v8_0_cp_compute_enable(adev, enable);
4838 }
4839
4840 static int gfx_v8_0_hw_init(void *handle)
4841 {
4842         int r;
4843         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4844
4845         gfx_v8_0_init_golden_registers(adev);
4846         gfx_v8_0_constants_init(adev);
4847
4848         r = gfx_v8_0_rlc_resume(adev);
4849         if (r)
4850                 return r;
4851
4852         r = gfx_v8_0_cp_resume(adev);
4853
4854         return r;
4855 }
4856
4857 static int gfx_v8_0_kcq_disable(struct amdgpu_device *adev)
4858 {
4859         int r, i;
4860         struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
4861
4862         r = amdgpu_ring_alloc(kiq_ring, 6 * adev->gfx.num_compute_rings);
4863         if (r)
4864                 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
4865
4866         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4867                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4868
4869                 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
4870                 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
4871                                                 PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */
4872                                                 PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
4873                                                 PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) |
4874                                                 PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
4875                 amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
4876                 amdgpu_ring_write(kiq_ring, 0);
4877                 amdgpu_ring_write(kiq_ring, 0);
4878                 amdgpu_ring_write(kiq_ring, 0);
4879         }
4880         r = amdgpu_ring_test_helper(kiq_ring);
4881         if (r)
4882                 DRM_ERROR("KCQ disable failed\n");
4883
4884         return r;
4885 }
4886
4887 static bool gfx_v8_0_is_idle(void *handle)
4888 {
4889         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4890
4891         if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE)
4892                 || RREG32(mmGRBM_STATUS2) != 0x8)
4893                 return false;
4894         else
4895                 return true;
4896 }
4897
4898 static bool gfx_v8_0_rlc_is_idle(void *handle)
4899 {
4900         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4901
4902         if (RREG32(mmGRBM_STATUS2) != 0x8)
4903                 return false;
4904         else
4905                 return true;
4906 }
4907
4908 static int gfx_v8_0_wait_for_rlc_idle(void *handle)
4909 {
4910         unsigned int i;
4911         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4912
4913         for (i = 0; i < adev->usec_timeout; i++) {
4914                 if (gfx_v8_0_rlc_is_idle(handle))
4915                         return 0;
4916
4917                 udelay(1);
4918         }
4919         return -ETIMEDOUT;
4920 }
4921
4922 static int gfx_v8_0_wait_for_idle(void *handle)
4923 {
4924         unsigned int i;
4925         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4926
4927         for (i = 0; i < adev->usec_timeout; i++) {
4928                 if (gfx_v8_0_is_idle(handle))
4929                         return 0;
4930
4931                 udelay(1);
4932         }
4933         return -ETIMEDOUT;
4934 }
4935
4936 static int gfx_v8_0_hw_fini(void *handle)
4937 {
4938         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4939
4940         amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
4941         amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
4942
4943         amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
4944
4945         amdgpu_irq_put(adev, &adev->gfx.sq_irq, 0);
4946
4947         /* disable KCQ to avoid CPC touch memory not valid anymore */
4948         gfx_v8_0_kcq_disable(adev);
4949
4950         if (amdgpu_sriov_vf(adev)) {
4951                 pr_debug("For SRIOV client, shouldn't do anything.\n");
4952                 return 0;
4953         }
4954         adev->gfx.rlc.funcs->enter_safe_mode(adev);
4955         if (!gfx_v8_0_wait_for_idle(adev))
4956                 gfx_v8_0_cp_enable(adev, false);
4957         else
4958                 pr_err("cp is busy, skip halt cp\n");
4959         if (!gfx_v8_0_wait_for_rlc_idle(adev))
4960                 gfx_v8_0_rlc_stop(adev);
4961         else
4962                 pr_err("rlc is busy, skip halt rlc\n");
4963         adev->gfx.rlc.funcs->exit_safe_mode(adev);
4964         return 0;
4965 }
4966
4967 static int gfx_v8_0_suspend(void *handle)
4968 {
4969         return gfx_v8_0_hw_fini(handle);
4970 }
4971
4972 static int gfx_v8_0_resume(void *handle)
4973 {
4974         return gfx_v8_0_hw_init(handle);
4975 }
4976
4977 static bool gfx_v8_0_check_soft_reset(void *handle)
4978 {
4979         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4980         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4981         u32 tmp;
4982
4983         /* GRBM_STATUS */
4984         tmp = RREG32(mmGRBM_STATUS);
4985         if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
4986                    GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
4987                    GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
4988                    GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
4989                    GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
4990                    GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK |
4991                    GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
4992                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4993                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4994                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4995                                                 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
4996                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4997                                                 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
4998         }
4999
5000         /* GRBM_STATUS2 */
5001         tmp = RREG32(mmGRBM_STATUS2);
5002         if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
5003                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5004                                                 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
5005
5006         if (REG_GET_FIELD(tmp, GRBM_STATUS2, CPF_BUSY) ||
5007             REG_GET_FIELD(tmp, GRBM_STATUS2, CPC_BUSY) ||
5008             REG_GET_FIELD(tmp, GRBM_STATUS2, CPG_BUSY)) {
5009                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5010                                                 SOFT_RESET_CPF, 1);
5011                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5012                                                 SOFT_RESET_CPC, 1);
5013                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5014                                                 SOFT_RESET_CPG, 1);
5015                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET,
5016                                                 SOFT_RESET_GRBM, 1);
5017         }
5018
5019         /* SRBM_STATUS */
5020         tmp = RREG32(mmSRBM_STATUS);
5021         if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
5022                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5023                                                 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5024         if (REG_GET_FIELD(tmp, SRBM_STATUS, SEM_BUSY))
5025                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5026                                                 SRBM_SOFT_RESET, SOFT_RESET_SEM, 1);
5027
5028         if (grbm_soft_reset || srbm_soft_reset) {
5029                 adev->gfx.grbm_soft_reset = grbm_soft_reset;
5030                 adev->gfx.srbm_soft_reset = srbm_soft_reset;
5031                 return true;
5032         } else {
5033                 adev->gfx.grbm_soft_reset = 0;
5034                 adev->gfx.srbm_soft_reset = 0;
5035                 return false;
5036         }
5037 }
5038
5039 static int gfx_v8_0_pre_soft_reset(void *handle)
5040 {
5041         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5042         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5043
5044         if ((!adev->gfx.grbm_soft_reset) &&
5045             (!adev->gfx.srbm_soft_reset))
5046                 return 0;
5047
5048         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5049         srbm_soft_reset = adev->gfx.srbm_soft_reset;
5050
5051         /* stop the rlc */
5052         gfx_v8_0_rlc_stop(adev);
5053
5054         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5055             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5056                 /* Disable GFX parsing/prefetching */
5057                 gfx_v8_0_cp_gfx_enable(adev, false);
5058
5059         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5060             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5061             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5062             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5063                 int i;
5064
5065                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5066                         struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5067
5068                         mutex_lock(&adev->srbm_mutex);
5069                         vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5070                         gfx_v8_0_deactivate_hqd(adev, 2);
5071                         vi_srbm_select(adev, 0, 0, 0, 0);
5072                         mutex_unlock(&adev->srbm_mutex);
5073                 }
5074                 /* Disable MEC parsing/prefetching */
5075                 gfx_v8_0_cp_compute_enable(adev, false);
5076         }
5077
5078        return 0;
5079 }
5080
5081 static int gfx_v8_0_soft_reset(void *handle)
5082 {
5083         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5084         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5085         u32 tmp;
5086
5087         if ((!adev->gfx.grbm_soft_reset) &&
5088             (!adev->gfx.srbm_soft_reset))
5089                 return 0;
5090
5091         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5092         srbm_soft_reset = adev->gfx.srbm_soft_reset;
5093
5094         if (grbm_soft_reset || srbm_soft_reset) {
5095                 tmp = RREG32(mmGMCON_DEBUG);
5096                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 1);
5097                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 1);
5098                 WREG32(mmGMCON_DEBUG, tmp);
5099                 udelay(50);
5100         }
5101
5102         if (grbm_soft_reset) {
5103                 tmp = RREG32(mmGRBM_SOFT_RESET);
5104                 tmp |= grbm_soft_reset;
5105                 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5106                 WREG32(mmGRBM_SOFT_RESET, tmp);
5107                 tmp = RREG32(mmGRBM_SOFT_RESET);
5108
5109                 udelay(50);
5110
5111                 tmp &= ~grbm_soft_reset;
5112                 WREG32(mmGRBM_SOFT_RESET, tmp);
5113                 tmp = RREG32(mmGRBM_SOFT_RESET);
5114         }
5115
5116         if (srbm_soft_reset) {
5117                 tmp = RREG32(mmSRBM_SOFT_RESET);
5118                 tmp |= srbm_soft_reset;
5119                 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5120                 WREG32(mmSRBM_SOFT_RESET, tmp);
5121                 tmp = RREG32(mmSRBM_SOFT_RESET);
5122
5123                 udelay(50);
5124
5125                 tmp &= ~srbm_soft_reset;
5126                 WREG32(mmSRBM_SOFT_RESET, tmp);
5127                 tmp = RREG32(mmSRBM_SOFT_RESET);
5128         }
5129
5130         if (grbm_soft_reset || srbm_soft_reset) {
5131                 tmp = RREG32(mmGMCON_DEBUG);
5132                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 0);
5133                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 0);
5134                 WREG32(mmGMCON_DEBUG, tmp);
5135         }
5136
5137         /* Wait a little for things to settle down */
5138         udelay(50);
5139
5140         return 0;
5141 }
5142
5143 static int gfx_v8_0_post_soft_reset(void *handle)
5144 {
5145         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5146         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5147
5148         if ((!adev->gfx.grbm_soft_reset) &&
5149             (!adev->gfx.srbm_soft_reset))
5150                 return 0;
5151
5152         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5153         srbm_soft_reset = adev->gfx.srbm_soft_reset;
5154
5155         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5156             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5157             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5158             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5159                 int i;
5160
5161                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5162                         struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5163
5164                         mutex_lock(&adev->srbm_mutex);
5165                         vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5166                         gfx_v8_0_deactivate_hqd(adev, 2);
5167                         vi_srbm_select(adev, 0, 0, 0, 0);
5168                         mutex_unlock(&adev->srbm_mutex);
5169                 }
5170                 gfx_v8_0_kiq_resume(adev);
5171                 gfx_v8_0_kcq_resume(adev);
5172         }
5173
5174         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5175             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5176                 gfx_v8_0_cp_gfx_resume(adev);
5177
5178         gfx_v8_0_rlc_start(adev);
5179
5180         return 0;
5181 }
5182
5183 /**
5184  * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
5185  *
5186  * @adev: amdgpu_device pointer
5187  *
5188  * Fetches a GPU clock counter snapshot.
5189  * Returns the 64 bit clock counter snapshot.
5190  */
5191 static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
5192 {
5193         uint64_t clock;
5194
5195         mutex_lock(&adev->gfx.gpu_clock_mutex);
5196         WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
5197         clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
5198                 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5199         mutex_unlock(&adev->gfx.gpu_clock_mutex);
5200         return clock;
5201 }
5202
5203 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
5204                                           uint32_t vmid,
5205                                           uint32_t gds_base, uint32_t gds_size,
5206                                           uint32_t gws_base, uint32_t gws_size,
5207                                           uint32_t oa_base, uint32_t oa_size)
5208 {
5209         /* GDS Base */
5210         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5211         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5212                                 WRITE_DATA_DST_SEL(0)));
5213         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
5214         amdgpu_ring_write(ring, 0);
5215         amdgpu_ring_write(ring, gds_base);
5216
5217         /* GDS Size */
5218         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5219         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5220                                 WRITE_DATA_DST_SEL(0)));
5221         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
5222         amdgpu_ring_write(ring, 0);
5223         amdgpu_ring_write(ring, gds_size);
5224
5225         /* GWS */
5226         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5227         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5228                                 WRITE_DATA_DST_SEL(0)));
5229         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
5230         amdgpu_ring_write(ring, 0);
5231         amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
5232
5233         /* OA */
5234         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5235         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5236                                 WRITE_DATA_DST_SEL(0)));
5237         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
5238         amdgpu_ring_write(ring, 0);
5239         amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
5240 }
5241
5242 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
5243 {
5244         WREG32(mmSQ_IND_INDEX,
5245                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5246                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5247                 (address << SQ_IND_INDEX__INDEX__SHIFT) |
5248                 (SQ_IND_INDEX__FORCE_READ_MASK));
5249         return RREG32(mmSQ_IND_DATA);
5250 }
5251
5252 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
5253                            uint32_t wave, uint32_t thread,
5254                            uint32_t regno, uint32_t num, uint32_t *out)
5255 {
5256         WREG32(mmSQ_IND_INDEX,
5257                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5258                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5259                 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
5260                 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
5261                 (SQ_IND_INDEX__FORCE_READ_MASK) |
5262                 (SQ_IND_INDEX__AUTO_INCR_MASK));
5263         while (num--)
5264                 *(out++) = RREG32(mmSQ_IND_DATA);
5265 }
5266
5267 static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
5268 {
5269         /* type 0 wave data */
5270         dst[(*no_fields)++] = 0;
5271         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
5272         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
5273         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
5274         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
5275         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
5276         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
5277         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
5278         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
5279         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
5280         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
5281         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
5282         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
5283         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_LO);
5284         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_HI);
5285         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_LO);
5286         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI);
5287         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
5288         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
5289 }
5290
5291 static void gfx_v8_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
5292                                      uint32_t wave, uint32_t start,
5293                                      uint32_t size, uint32_t *dst)
5294 {
5295         wave_read_regs(
5296                 adev, simd, wave, 0,
5297                 start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
5298 }
5299
5300
5301 static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = {
5302         .get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter,
5303         .select_se_sh = &gfx_v8_0_select_se_sh,
5304         .read_wave_data = &gfx_v8_0_read_wave_data,
5305         .read_wave_sgprs = &gfx_v8_0_read_wave_sgprs,
5306         .select_me_pipe_q = &gfx_v8_0_select_me_pipe_q
5307 };
5308
5309 static int gfx_v8_0_early_init(void *handle)
5310 {
5311         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5312
5313         adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
5314         adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
5315         adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
5316         gfx_v8_0_set_ring_funcs(adev);
5317         gfx_v8_0_set_irq_funcs(adev);
5318         gfx_v8_0_set_gds_init(adev);
5319         gfx_v8_0_set_rlc_funcs(adev);
5320
5321         return 0;
5322 }
5323
5324 static int gfx_v8_0_late_init(void *handle)
5325 {
5326         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5327         int r;
5328
5329         r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
5330         if (r)
5331                 return r;
5332
5333         r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
5334         if (r)
5335                 return r;
5336
5337         /* requires IBs so do in late init after IB pool is initialized */
5338         r = gfx_v8_0_do_edc_gpr_workarounds(adev);
5339         if (r)
5340                 return r;
5341
5342         r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0);
5343         if (r) {
5344                 DRM_ERROR("amdgpu_irq_get() failed to get IRQ for EDC, r: %d.\n", r);
5345                 return r;
5346         }
5347
5348         r = amdgpu_irq_get(adev, &adev->gfx.sq_irq, 0);
5349         if (r) {
5350                 DRM_ERROR(
5351                         "amdgpu_irq_get() failed to get IRQ for SQ, r: %d.\n",
5352                         r);
5353                 return r;
5354         }
5355
5356         return 0;
5357 }
5358
5359 static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
5360                                                        bool enable)
5361 {
5362         if (((adev->asic_type == CHIP_POLARIS11) ||
5363             (adev->asic_type == CHIP_POLARIS12) ||
5364             (adev->asic_type == CHIP_VEGAM)) &&
5365             adev->powerplay.pp_funcs->set_powergating_by_smu)
5366                 /* Send msg to SMU via Powerplay */
5367                 amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, enable);
5368
5369         WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0);
5370 }
5371
5372 static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
5373                                                         bool enable)
5374 {
5375         WREG32_FIELD(RLC_PG_CNTL, DYN_PER_CU_PG_ENABLE, enable ? 1 : 0);
5376 }
5377
5378 static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
5379                 bool enable)
5380 {
5381         WREG32_FIELD(RLC_PG_CNTL, QUICK_PG_ENABLE, enable ? 1 : 0);
5382 }
5383
5384 static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
5385                                           bool enable)
5386 {
5387         WREG32_FIELD(RLC_PG_CNTL, GFX_POWER_GATING_ENABLE, enable ? 1 : 0);
5388 }
5389
5390 static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev,
5391                                                 bool enable)
5392 {
5393         WREG32_FIELD(RLC_PG_CNTL, GFX_PIPELINE_PG_ENABLE, enable ? 1 : 0);
5394
5395         /* Read any GFX register to wake up GFX. */
5396         if (!enable)
5397                 RREG32(mmDB_RENDER_CONTROL);
5398 }
5399
5400 static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev,
5401                                           bool enable)
5402 {
5403         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
5404                 cz_enable_gfx_cg_power_gating(adev, true);
5405                 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
5406                         cz_enable_gfx_pipeline_power_gating(adev, true);
5407         } else {
5408                 cz_enable_gfx_cg_power_gating(adev, false);
5409                 cz_enable_gfx_pipeline_power_gating(adev, false);
5410         }
5411 }
5412
5413 static int gfx_v8_0_set_powergating_state(void *handle,
5414                                           enum amd_powergating_state state)
5415 {
5416         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5417         bool enable = (state == AMD_PG_STATE_GATE);
5418
5419         if (amdgpu_sriov_vf(adev))
5420                 return 0;
5421
5422         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_SMG |
5423                                 AMD_PG_SUPPORT_RLC_SMU_HS |
5424                                 AMD_PG_SUPPORT_CP |
5425                                 AMD_PG_SUPPORT_GFX_DMG))
5426                 adev->gfx.rlc.funcs->enter_safe_mode(adev);
5427         switch (adev->asic_type) {
5428         case CHIP_CARRIZO:
5429         case CHIP_STONEY:
5430
5431                 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
5432                         cz_enable_sck_slow_down_on_power_up(adev, true);
5433                         cz_enable_sck_slow_down_on_power_down(adev, true);
5434                 } else {
5435                         cz_enable_sck_slow_down_on_power_up(adev, false);
5436                         cz_enable_sck_slow_down_on_power_down(adev, false);
5437                 }
5438                 if (adev->pg_flags & AMD_PG_SUPPORT_CP)
5439                         cz_enable_cp_power_gating(adev, true);
5440                 else
5441                         cz_enable_cp_power_gating(adev, false);
5442
5443                 cz_update_gfx_cg_power_gating(adev, enable);
5444
5445                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5446                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5447                 else
5448                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5449
5450                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5451                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5452                 else
5453                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5454                 break;
5455         case CHIP_POLARIS11:
5456         case CHIP_POLARIS12:
5457         case CHIP_VEGAM:
5458                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5459                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5460                 else
5461                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5462
5463                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5464                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5465                 else
5466                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5467
5468                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable)
5469                         polaris11_enable_gfx_quick_mg_power_gating(adev, true);
5470                 else
5471                         polaris11_enable_gfx_quick_mg_power_gating(adev, false);
5472                 break;
5473         default:
5474                 break;
5475         }
5476         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_SMG |
5477                                 AMD_PG_SUPPORT_RLC_SMU_HS |
5478                                 AMD_PG_SUPPORT_CP |
5479                                 AMD_PG_SUPPORT_GFX_DMG))
5480                 adev->gfx.rlc.funcs->exit_safe_mode(adev);
5481         return 0;
5482 }
5483
5484 static void gfx_v8_0_get_clockgating_state(void *handle, u32 *flags)
5485 {
5486         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5487         int data;
5488
5489         if (amdgpu_sriov_vf(adev))
5490                 *flags = 0;
5491
5492         /* AMD_CG_SUPPORT_GFX_MGCG */
5493         data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5494         if (!(data & RLC_CGTT_MGCG_OVERRIDE__CPF_MASK))
5495                 *flags |= AMD_CG_SUPPORT_GFX_MGCG;
5496
5497         /* AMD_CG_SUPPORT_GFX_CGLG */
5498         data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5499         if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5500                 *flags |= AMD_CG_SUPPORT_GFX_CGCG;
5501
5502         /* AMD_CG_SUPPORT_GFX_CGLS */
5503         if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5504                 *flags |= AMD_CG_SUPPORT_GFX_CGLS;
5505
5506         /* AMD_CG_SUPPORT_GFX_CGTS */
5507         data = RREG32(mmCGTS_SM_CTRL_REG);
5508         if (!(data & CGTS_SM_CTRL_REG__OVERRIDE_MASK))
5509                 *flags |= AMD_CG_SUPPORT_GFX_CGTS;
5510
5511         /* AMD_CG_SUPPORT_GFX_CGTS_LS */
5512         if (!(data & CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK))
5513                 *flags |= AMD_CG_SUPPORT_GFX_CGTS_LS;
5514
5515         /* AMD_CG_SUPPORT_GFX_RLC_LS */
5516         data = RREG32(mmRLC_MEM_SLP_CNTL);
5517         if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
5518                 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
5519
5520         /* AMD_CG_SUPPORT_GFX_CP_LS */
5521         data = RREG32(mmCP_MEM_SLP_CNTL);
5522         if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
5523                 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
5524 }
5525
5526 static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
5527                                      uint32_t reg_addr, uint32_t cmd)
5528 {
5529         uint32_t data;
5530
5531         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
5532
5533         WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5534         WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5535
5536         data = RREG32(mmRLC_SERDES_WR_CTRL);
5537         if (adev->asic_type == CHIP_STONEY)
5538                 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5539                           RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5540                           RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5541                           RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5542                           RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5543                           RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5544                           RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5545                           RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5546                           RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5547         else
5548                 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5549                           RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5550                           RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5551                           RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5552                           RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5553                           RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5554                           RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5555                           RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5556                           RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
5557                           RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
5558                           RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5559         data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
5560                  (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
5561                  (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
5562                  (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
5563
5564         WREG32(mmRLC_SERDES_WR_CTRL, data);
5565 }
5566
5567 #define MSG_ENTER_RLC_SAFE_MODE     1
5568 #define MSG_EXIT_RLC_SAFE_MODE      0
5569 #define RLC_GPR_REG2__REQ_MASK 0x00000001
5570 #define RLC_GPR_REG2__REQ__SHIFT 0
5571 #define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001
5572 #define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e
5573
5574 static void iceland_enter_rlc_safe_mode(struct amdgpu_device *adev)
5575 {
5576         u32 data;
5577         unsigned i;
5578
5579         data = RREG32(mmRLC_CNTL);
5580         if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5581                 return;
5582
5583         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5584                 data |= RLC_SAFE_MODE__CMD_MASK;
5585                 data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5586                 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
5587                 WREG32(mmRLC_SAFE_MODE, data);
5588
5589                 for (i = 0; i < adev->usec_timeout; i++) {
5590                         if ((RREG32(mmRLC_GPM_STAT) &
5591                              (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5592                               RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5593                             (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5594                              RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5595                                 break;
5596                         udelay(1);
5597                 }
5598
5599                 for (i = 0; i < adev->usec_timeout; i++) {
5600                         if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5601                                 break;
5602                         udelay(1);
5603                 }
5604                 adev->gfx.rlc.in_safe_mode = true;
5605         }
5606 }
5607
5608 static void iceland_exit_rlc_safe_mode(struct amdgpu_device *adev)
5609 {
5610         u32 data = 0;
5611         unsigned i;
5612
5613         data = RREG32(mmRLC_CNTL);
5614         if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5615                 return;
5616
5617         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5618                 if (adev->gfx.rlc.in_safe_mode) {
5619                         data |= RLC_SAFE_MODE__CMD_MASK;
5620                         data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5621                         WREG32(mmRLC_SAFE_MODE, data);
5622                         adev->gfx.rlc.in_safe_mode = false;
5623                 }
5624         }
5625
5626         for (i = 0; i < adev->usec_timeout; i++) {
5627                 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5628                         break;
5629                 udelay(1);
5630         }
5631 }
5632
5633 static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
5634         .enter_safe_mode = iceland_enter_rlc_safe_mode,
5635         .exit_safe_mode = iceland_exit_rlc_safe_mode
5636 };
5637
5638 static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
5639                                                       bool enable)
5640 {
5641         uint32_t temp, data;
5642
5643         adev->gfx.rlc.funcs->enter_safe_mode(adev);
5644
5645         /* It is disabled by HW by default */
5646         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
5647                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5648                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS)
5649                                 /* 1 - RLC memory Light sleep */
5650                                 WREG32_FIELD(RLC_MEM_SLP_CNTL, RLC_MEM_LS_EN, 1);
5651
5652                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS)
5653                                 WREG32_FIELD(CP_MEM_SLP_CNTL, CP_MEM_LS_EN, 1);
5654                 }
5655
5656                 /* 3 - RLC_CGTT_MGCG_OVERRIDE */
5657                 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5658                 if (adev->flags & AMD_IS_APU)
5659                         data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5660                                   RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5661                                   RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK);
5662                 else
5663                         data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5664                                   RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5665                                   RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5666                                   RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5667
5668                 if (temp != data)
5669                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5670
5671                 /* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5672                 gfx_v8_0_wait_for_rlc_serdes(adev);
5673
5674                 /* 5 - clear mgcg override */
5675                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5676
5677                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
5678                         /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
5679                         temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5680                         data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
5681                         data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
5682                         data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
5683                         data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
5684                         if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
5685                             (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
5686                                 data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
5687                         data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
5688                         data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
5689                         if (temp != data)
5690                                 WREG32(mmCGTS_SM_CTRL_REG, data);
5691                 }
5692                 udelay(50);
5693
5694                 /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5695                 gfx_v8_0_wait_for_rlc_serdes(adev);
5696         } else {
5697                 /* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
5698                 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5699                 data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5700                                 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5701                                 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5702                                 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5703                 if (temp != data)
5704                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5705
5706                 /* 2 - disable MGLS in RLC */
5707                 data = RREG32(mmRLC_MEM_SLP_CNTL);
5708                 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
5709                         data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5710                         WREG32(mmRLC_MEM_SLP_CNTL, data);
5711                 }
5712
5713                 /* 3 - disable MGLS in CP */
5714                 data = RREG32(mmCP_MEM_SLP_CNTL);
5715                 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
5716                         data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5717                         WREG32(mmCP_MEM_SLP_CNTL, data);
5718                 }
5719
5720                 /* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
5721                 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5722                 data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
5723                                 CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
5724                 if (temp != data)
5725                         WREG32(mmCGTS_SM_CTRL_REG, data);
5726
5727                 /* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5728                 gfx_v8_0_wait_for_rlc_serdes(adev);
5729
5730                 /* 6 - set mgcg override */
5731                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5732
5733                 udelay(50);
5734
5735                 /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5736                 gfx_v8_0_wait_for_rlc_serdes(adev);
5737         }
5738
5739         adev->gfx.rlc.funcs->exit_safe_mode(adev);
5740 }
5741
5742 static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5743                                                       bool enable)
5744 {
5745         uint32_t temp, temp1, data, data1;
5746
5747         temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5748
5749         adev->gfx.rlc.funcs->enter_safe_mode(adev);
5750
5751         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
5752                 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5753                 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
5754                 if (temp1 != data1)
5755                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5756
5757                 /* : wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5758                 gfx_v8_0_wait_for_rlc_serdes(adev);
5759
5760                 /* 2 - clear cgcg override */
5761                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5762
5763                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5764                 gfx_v8_0_wait_for_rlc_serdes(adev);
5765
5766                 /* 3 - write cmd to set CGLS */
5767                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
5768
5769                 /* 4 - enable cgcg */
5770                 data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5771
5772                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5773                         /* enable cgls*/
5774                         data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5775
5776                         temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5777                         data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
5778
5779                         if (temp1 != data1)
5780                                 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5781                 } else {
5782                         data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5783                 }
5784
5785                 if (temp != data)
5786                         WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5787
5788                 /* 5 enable cntx_empty_int_enable/cntx_busy_int_enable/
5789                  * Cmp_busy/GFX_Idle interrupts
5790                  */
5791                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5792         } else {
5793                 /* disable cntx_empty_int_enable & GFX Idle interrupt */
5794                 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5795
5796                 /* TEST CGCG */
5797                 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5798                 data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
5799                                 RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
5800                 if (temp1 != data1)
5801                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5802
5803                 /* read gfx register to wake up cgcg */
5804                 RREG32(mmCB_CGTT_SCLK_CTRL);
5805                 RREG32(mmCB_CGTT_SCLK_CTRL);
5806                 RREG32(mmCB_CGTT_SCLK_CTRL);
5807                 RREG32(mmCB_CGTT_SCLK_CTRL);
5808
5809                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5810                 gfx_v8_0_wait_for_rlc_serdes(adev);
5811
5812                 /* write cmd to Set CGCG Overrride */
5813                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5814
5815                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5816                 gfx_v8_0_wait_for_rlc_serdes(adev);
5817
5818                 /* write cmd to Clear CGLS */
5819                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
5820
5821                 /* disable cgcg, cgls should be disabled too. */
5822                 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
5823                           RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
5824                 if (temp != data)
5825                         WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5826                 /* enable interrupts again for PG */
5827                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5828         }
5829
5830         gfx_v8_0_wait_for_rlc_serdes(adev);
5831
5832         adev->gfx.rlc.funcs->exit_safe_mode(adev);
5833 }
5834 static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
5835                                             bool enable)
5836 {
5837         if (enable) {
5838                 /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
5839                  * ===  MGCG + MGLS + TS(CG/LS) ===
5840                  */
5841                 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5842                 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5843         } else {
5844                 /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
5845                  * ===  CGCG + CGLS ===
5846                  */
5847                 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5848                 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5849         }
5850         return 0;
5851 }
5852
5853 static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev,
5854                                           enum amd_clockgating_state state)
5855 {
5856         uint32_t msg_id, pp_state = 0;
5857         uint32_t pp_support_state = 0;
5858
5859         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
5860                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5861                         pp_support_state = PP_STATE_SUPPORT_LS;
5862                         pp_state = PP_STATE_LS;
5863                 }
5864                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
5865                         pp_support_state |= PP_STATE_SUPPORT_CG;
5866                         pp_state |= PP_STATE_CG;
5867                 }
5868                 if (state == AMD_CG_STATE_UNGATE)
5869                         pp_state = 0;
5870
5871                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5872                                 PP_BLOCK_GFX_CG,
5873                                 pp_support_state,
5874                                 pp_state);
5875                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
5876                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5877         }
5878
5879         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
5880                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5881                         pp_support_state = PP_STATE_SUPPORT_LS;
5882                         pp_state = PP_STATE_LS;
5883                 }
5884
5885                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
5886                         pp_support_state |= PP_STATE_SUPPORT_CG;
5887                         pp_state |= PP_STATE_CG;
5888                 }
5889
5890                 if (state == AMD_CG_STATE_UNGATE)
5891                         pp_state = 0;
5892
5893                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5894                                 PP_BLOCK_GFX_MG,
5895                                 pp_support_state,
5896                                 pp_state);
5897                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
5898                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5899         }
5900
5901         return 0;
5902 }
5903
5904 static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev,
5905                                           enum amd_clockgating_state state)
5906 {
5907
5908         uint32_t msg_id, pp_state = 0;
5909         uint32_t pp_support_state = 0;
5910
5911         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
5912                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5913                         pp_support_state = PP_STATE_SUPPORT_LS;
5914                         pp_state = PP_STATE_LS;
5915                 }
5916                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
5917                         pp_support_state |= PP_STATE_SUPPORT_CG;
5918                         pp_state |= PP_STATE_CG;
5919                 }
5920                 if (state == AMD_CG_STATE_UNGATE)
5921                         pp_state = 0;
5922
5923                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5924                                 PP_BLOCK_GFX_CG,
5925                                 pp_support_state,
5926                                 pp_state);
5927                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
5928                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5929         }
5930
5931         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_GFX_3D_CGLS)) {
5932                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) {
5933                         pp_support_state = PP_STATE_SUPPORT_LS;
5934                         pp_state = PP_STATE_LS;
5935                 }
5936                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) {
5937                         pp_support_state |= PP_STATE_SUPPORT_CG;
5938                         pp_state |= PP_STATE_CG;
5939                 }
5940                 if (state == AMD_CG_STATE_UNGATE)
5941                         pp_state = 0;
5942
5943                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5944                                 PP_BLOCK_GFX_3D,
5945                                 pp_support_state,
5946                                 pp_state);
5947                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
5948                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5949         }
5950
5951         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
5952                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5953                         pp_support_state = PP_STATE_SUPPORT_LS;
5954                         pp_state = PP_STATE_LS;
5955                 }
5956
5957                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
5958                         pp_support_state |= PP_STATE_SUPPORT_CG;
5959                         pp_state |= PP_STATE_CG;
5960                 }
5961
5962                 if (state == AMD_CG_STATE_UNGATE)
5963                         pp_state = 0;
5964
5965                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5966                                 PP_BLOCK_GFX_MG,
5967                                 pp_support_state,
5968                                 pp_state);
5969                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
5970                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5971         }
5972
5973         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
5974                 pp_support_state = PP_STATE_SUPPORT_LS;
5975
5976                 if (state == AMD_CG_STATE_UNGATE)
5977                         pp_state = 0;
5978                 else
5979                         pp_state = PP_STATE_LS;
5980
5981                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5982                                 PP_BLOCK_GFX_RLC,
5983                                 pp_support_state,
5984                                 pp_state);
5985                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
5986                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5987         }
5988
5989         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
5990                 pp_support_state = PP_STATE_SUPPORT_LS;
5991
5992                 if (state == AMD_CG_STATE_UNGATE)
5993                         pp_state = 0;
5994                 else
5995                         pp_state = PP_STATE_LS;
5996                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5997                         PP_BLOCK_GFX_CP,
5998                         pp_support_state,
5999                         pp_state);
6000                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6001                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6002         }
6003
6004         return 0;
6005 }
6006
6007 static int gfx_v8_0_set_clockgating_state(void *handle,
6008                                           enum amd_clockgating_state state)
6009 {
6010         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
6011
6012         if (amdgpu_sriov_vf(adev))
6013                 return 0;
6014
6015         switch (adev->asic_type) {
6016         case CHIP_FIJI:
6017         case CHIP_CARRIZO:
6018         case CHIP_STONEY:
6019                 gfx_v8_0_update_gfx_clock_gating(adev,
6020                                                  state == AMD_CG_STATE_GATE);
6021                 break;
6022         case CHIP_TONGA:
6023                 gfx_v8_0_tonga_update_gfx_clock_gating(adev, state);
6024                 break;
6025         case CHIP_POLARIS10:
6026         case CHIP_POLARIS11:
6027         case CHIP_POLARIS12:
6028         case CHIP_VEGAM:
6029                 gfx_v8_0_polaris_update_gfx_clock_gating(adev, state);
6030                 break;
6031         default:
6032                 break;
6033         }
6034         return 0;
6035 }
6036
6037 static u64 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring)
6038 {
6039         return ring->adev->wb.wb[ring->rptr_offs];
6040 }
6041
6042 static u64 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
6043 {
6044         struct amdgpu_device *adev = ring->adev;
6045
6046         if (ring->use_doorbell)
6047                 /* XXX check if swapping is necessary on BE */
6048                 return ring->adev->wb.wb[ring->wptr_offs];
6049         else
6050                 return RREG32(mmCP_RB0_WPTR);
6051 }
6052
6053 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
6054 {
6055         struct amdgpu_device *adev = ring->adev;
6056
6057         if (ring->use_doorbell) {
6058                 /* XXX check if swapping is necessary on BE */
6059                 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6060                 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6061         } else {
6062                 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
6063                 (void)RREG32(mmCP_RB0_WPTR);
6064         }
6065 }
6066
6067 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
6068 {
6069         u32 ref_and_mask, reg_mem_engine;
6070
6071         if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) ||
6072             (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)) {
6073                 switch (ring->me) {
6074                 case 1:
6075                         ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
6076                         break;
6077                 case 2:
6078                         ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
6079                         break;
6080                 default:
6081                         return;
6082                 }
6083                 reg_mem_engine = 0;
6084         } else {
6085                 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
6086                 reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
6087         }
6088
6089         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6090         amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
6091                                  WAIT_REG_MEM_FUNCTION(3) |  /* == */
6092                                  reg_mem_engine));
6093         amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
6094         amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
6095         amdgpu_ring_write(ring, ref_and_mask);
6096         amdgpu_ring_write(ring, ref_and_mask);
6097         amdgpu_ring_write(ring, 0x20); /* poll interval */
6098 }
6099
6100 static void gfx_v8_0_ring_emit_vgt_flush(struct amdgpu_ring *ring)
6101 {
6102         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6103         amdgpu_ring_write(ring, EVENT_TYPE(VS_PARTIAL_FLUSH) |
6104                 EVENT_INDEX(4));
6105
6106         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6107         amdgpu_ring_write(ring, EVENT_TYPE(VGT_FLUSH) |
6108                 EVENT_INDEX(0));
6109 }
6110
6111 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
6112                                         struct amdgpu_job *job,
6113                                         struct amdgpu_ib *ib,
6114                                         bool ctx_switch)
6115 {
6116         unsigned vmid = AMDGPU_JOB_GET_VMID(job);
6117         u32 header, control = 0;
6118
6119         if (ib->flags & AMDGPU_IB_FLAG_CE)
6120                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
6121         else
6122                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
6123
6124         control |= ib->length_dw | (vmid << 24);
6125
6126         if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
6127                 control |= INDIRECT_BUFFER_PRE_ENB(1);
6128
6129                 if (!(ib->flags & AMDGPU_IB_FLAG_CE))
6130                         gfx_v8_0_ring_emit_de_meta(ring);
6131         }
6132
6133         amdgpu_ring_write(ring, header);
6134         amdgpu_ring_write(ring,
6135 #ifdef __BIG_ENDIAN
6136                           (2 << 0) |
6137 #endif
6138                           (ib->gpu_addr & 0xFFFFFFFC));
6139         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6140         amdgpu_ring_write(ring, control);
6141 }
6142
6143 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
6144                                           struct amdgpu_job *job,
6145                                           struct amdgpu_ib *ib,
6146                                           bool ctx_switch)
6147 {
6148         unsigned vmid = AMDGPU_JOB_GET_VMID(job);
6149         u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
6150
6151         amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
6152         amdgpu_ring_write(ring,
6153 #ifdef __BIG_ENDIAN
6154                                 (2 << 0) |
6155 #endif
6156                                 (ib->gpu_addr & 0xFFFFFFFC));
6157         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6158         amdgpu_ring_write(ring, control);
6159 }
6160
6161 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
6162                                          u64 seq, unsigned flags)
6163 {
6164         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6165         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6166
6167         /* EVENT_WRITE_EOP - flush caches, send int */
6168         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6169         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6170                                  EOP_TC_ACTION_EN |
6171                                  EOP_TC_WB_ACTION_EN |
6172                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6173                                  EVENT_INDEX(5)));
6174         amdgpu_ring_write(ring, addr & 0xfffffffc);
6175         amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
6176                           DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6177         amdgpu_ring_write(ring, lower_32_bits(seq));
6178         amdgpu_ring_write(ring, upper_32_bits(seq));
6179
6180 }
6181
6182 static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
6183 {
6184         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6185         uint32_t seq = ring->fence_drv.sync_seq;
6186         uint64_t addr = ring->fence_drv.gpu_addr;
6187
6188         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6189         amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
6190                                  WAIT_REG_MEM_FUNCTION(3) | /* equal */
6191                                  WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
6192         amdgpu_ring_write(ring, addr & 0xfffffffc);
6193         amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
6194         amdgpu_ring_write(ring, seq);
6195         amdgpu_ring_write(ring, 0xffffffff);
6196         amdgpu_ring_write(ring, 4); /* poll interval */
6197 }
6198
6199 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
6200                                         unsigned vmid, uint64_t pd_addr)
6201 {
6202         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6203
6204         amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
6205
6206         /* wait for the invalidate to complete */
6207         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6208         amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6209                                  WAIT_REG_MEM_FUNCTION(0) |  /* always */
6210                                  WAIT_REG_MEM_ENGINE(0))); /* me */
6211         amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6212         amdgpu_ring_write(ring, 0);
6213         amdgpu_ring_write(ring, 0); /* ref */
6214         amdgpu_ring_write(ring, 0); /* mask */
6215         amdgpu_ring_write(ring, 0x20); /* poll interval */
6216
6217         /* compute doesn't have PFP */
6218         if (usepfp) {
6219                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
6220                 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6221                 amdgpu_ring_write(ring, 0x0);
6222         }
6223 }
6224
6225 static u64 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
6226 {
6227         return ring->adev->wb.wb[ring->wptr_offs];
6228 }
6229
6230 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
6231 {
6232         struct amdgpu_device *adev = ring->adev;
6233
6234         /* XXX check if swapping is necessary on BE */
6235         adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6236         WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6237 }
6238
6239 static void gfx_v8_0_ring_set_pipe_percent(struct amdgpu_ring *ring,
6240                                            bool acquire)
6241 {
6242         struct amdgpu_device *adev = ring->adev;
6243         int pipe_num, tmp, reg;
6244         int pipe_percent = acquire ? SPI_WCL_PIPE_PERCENT_GFX__VALUE_MASK : 0x1;
6245
6246         pipe_num = ring->me * adev->gfx.mec.num_pipe_per_mec + ring->pipe;
6247
6248         /* first me only has 2 entries, GFX and HP3D */
6249         if (ring->me > 0)
6250                 pipe_num -= 2;
6251
6252         reg = mmSPI_WCL_PIPE_PERCENT_GFX + pipe_num;
6253         tmp = RREG32(reg);
6254         tmp = REG_SET_FIELD(tmp, SPI_WCL_PIPE_PERCENT_GFX, VALUE, pipe_percent);
6255         WREG32(reg, tmp);
6256 }
6257
6258 static void gfx_v8_0_pipe_reserve_resources(struct amdgpu_device *adev,
6259                                             struct amdgpu_ring *ring,
6260                                             bool acquire)
6261 {
6262         int i, pipe;
6263         bool reserve;
6264         struct amdgpu_ring *iring;
6265
6266         mutex_lock(&adev->gfx.pipe_reserve_mutex);
6267         pipe = amdgpu_gfx_queue_to_bit(adev, ring->me, ring->pipe, 0);
6268         if (acquire)
6269                 set_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6270         else
6271                 clear_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6272
6273         if (!bitmap_weight(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)) {
6274                 /* Clear all reservations - everyone reacquires all resources */
6275                 for (i = 0; i < adev->gfx.num_gfx_rings; ++i)
6276                         gfx_v8_0_ring_set_pipe_percent(&adev->gfx.gfx_ring[i],
6277                                                        true);
6278
6279                 for (i = 0; i < adev->gfx.num_compute_rings; ++i)
6280                         gfx_v8_0_ring_set_pipe_percent(&adev->gfx.compute_ring[i],
6281                                                        true);
6282         } else {
6283                 /* Lower all pipes without a current reservation */
6284                 for (i = 0; i < adev->gfx.num_gfx_rings; ++i) {
6285                         iring = &adev->gfx.gfx_ring[i];
6286                         pipe = amdgpu_gfx_queue_to_bit(adev,
6287                                                        iring->me,
6288                                                        iring->pipe,
6289                                                        0);
6290                         reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6291                         gfx_v8_0_ring_set_pipe_percent(iring, reserve);
6292                 }
6293
6294                 for (i = 0; i < adev->gfx.num_compute_rings; ++i) {
6295                         iring = &adev->gfx.compute_ring[i];
6296                         pipe = amdgpu_gfx_queue_to_bit(adev,
6297                                                        iring->me,
6298                                                        iring->pipe,
6299                                                        0);
6300                         reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6301                         gfx_v8_0_ring_set_pipe_percent(iring, reserve);
6302                 }
6303         }
6304
6305         mutex_unlock(&adev->gfx.pipe_reserve_mutex);
6306 }
6307
6308 static void gfx_v8_0_hqd_set_priority(struct amdgpu_device *adev,
6309                                       struct amdgpu_ring *ring,
6310                                       bool acquire)
6311 {
6312         uint32_t pipe_priority = acquire ? 0x2 : 0x0;
6313         uint32_t queue_priority = acquire ? 0xf : 0x0;
6314
6315         mutex_lock(&adev->srbm_mutex);
6316         vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
6317
6318         WREG32(mmCP_HQD_PIPE_PRIORITY, pipe_priority);
6319         WREG32(mmCP_HQD_QUEUE_PRIORITY, queue_priority);
6320
6321         vi_srbm_select(adev, 0, 0, 0, 0);
6322         mutex_unlock(&adev->srbm_mutex);
6323 }
6324 static void gfx_v8_0_ring_set_priority_compute(struct amdgpu_ring *ring,
6325                                                enum drm_sched_priority priority)
6326 {
6327         struct amdgpu_device *adev = ring->adev;
6328         bool acquire = priority == DRM_SCHED_PRIORITY_HIGH_HW;
6329
6330         if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE)
6331                 return;
6332
6333         gfx_v8_0_hqd_set_priority(adev, ring, acquire);
6334         gfx_v8_0_pipe_reserve_resources(adev, ring, acquire);
6335 }
6336
6337 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
6338                                              u64 addr, u64 seq,
6339                                              unsigned flags)
6340 {
6341         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6342         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6343
6344         /* RELEASE_MEM - flush caches, send int */
6345         amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
6346         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6347                                  EOP_TC_ACTION_EN |
6348                                  EOP_TC_WB_ACTION_EN |
6349                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6350                                  EVENT_INDEX(5)));
6351         amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6352         amdgpu_ring_write(ring, addr & 0xfffffffc);
6353         amdgpu_ring_write(ring, upper_32_bits(addr));
6354         amdgpu_ring_write(ring, lower_32_bits(seq));
6355         amdgpu_ring_write(ring, upper_32_bits(seq));
6356 }
6357
6358 static void gfx_v8_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
6359                                          u64 seq, unsigned int flags)
6360 {
6361         /* we only allocate 32bit for each seq wb address */
6362         BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
6363
6364         /* write fence seq to the "addr" */
6365         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6366         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6367                                  WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
6368         amdgpu_ring_write(ring, lower_32_bits(addr));
6369         amdgpu_ring_write(ring, upper_32_bits(addr));
6370         amdgpu_ring_write(ring, lower_32_bits(seq));
6371
6372         if (flags & AMDGPU_FENCE_FLAG_INT) {
6373                 /* set register to trigger INT */
6374                 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6375                 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6376                                          WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
6377                 amdgpu_ring_write(ring, mmCPC_INT_STATUS);
6378                 amdgpu_ring_write(ring, 0);
6379                 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
6380         }
6381 }
6382
6383 static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring)
6384 {
6385         amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6386         amdgpu_ring_write(ring, 0);
6387 }
6388
6389 static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
6390 {
6391         uint32_t dw2 = 0;
6392
6393         if (amdgpu_sriov_vf(ring->adev))
6394                 gfx_v8_0_ring_emit_ce_meta(ring);
6395
6396         dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
6397         if (flags & AMDGPU_HAVE_CTX_SWITCH) {
6398                 gfx_v8_0_ring_emit_vgt_flush(ring);
6399                 /* set load_global_config & load_global_uconfig */
6400                 dw2 |= 0x8001;
6401                 /* set load_cs_sh_regs */
6402                 dw2 |= 0x01000000;
6403                 /* set load_per_context_state & load_gfx_sh_regs for GFX */
6404                 dw2 |= 0x10002;
6405
6406                 /* set load_ce_ram if preamble presented */
6407                 if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
6408                         dw2 |= 0x10000000;
6409         } else {
6410                 /* still load_ce_ram if this is the first time preamble presented
6411                  * although there is no context switch happens.
6412                  */
6413                 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
6414                         dw2 |= 0x10000000;
6415         }
6416
6417         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6418         amdgpu_ring_write(ring, dw2);
6419         amdgpu_ring_write(ring, 0);
6420 }
6421
6422 static unsigned gfx_v8_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
6423 {
6424         unsigned ret;
6425
6426         amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
6427         amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
6428         amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
6429         amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
6430         ret = ring->wptr & ring->buf_mask;
6431         amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
6432         return ret;
6433 }
6434
6435 static void gfx_v8_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
6436 {
6437         unsigned cur;
6438
6439         BUG_ON(offset > ring->buf_mask);
6440         BUG_ON(ring->ring[offset] != 0x55aa55aa);
6441
6442         cur = (ring->wptr & ring->buf_mask) - 1;
6443         if (likely(cur > offset))
6444                 ring->ring[offset] = cur - offset;
6445         else
6446                 ring->ring[offset] = (ring->ring_size >> 2) - offset + cur;
6447 }
6448
6449 static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
6450 {
6451         struct amdgpu_device *adev = ring->adev;
6452
6453         amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
6454         amdgpu_ring_write(ring, 0 |     /* src: register*/
6455                                 (5 << 8) |      /* dst: memory */
6456                                 (1 << 20));     /* write confirm */
6457         amdgpu_ring_write(ring, reg);
6458         amdgpu_ring_write(ring, 0);
6459         amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
6460                                 adev->virt.reg_val_offs * 4));
6461         amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
6462                                 adev->virt.reg_val_offs * 4));
6463 }
6464
6465 static void gfx_v8_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
6466                                   uint32_t val)
6467 {
6468         uint32_t cmd;
6469
6470         switch (ring->funcs->type) {
6471         case AMDGPU_RING_TYPE_GFX:
6472                 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
6473                 break;
6474         case AMDGPU_RING_TYPE_KIQ:
6475                 cmd = 1 << 16; /* no inc addr */
6476                 break;
6477         default:
6478                 cmd = WR_CONFIRM;
6479                 break;
6480         }
6481
6482         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6483         amdgpu_ring_write(ring, cmd);
6484         amdgpu_ring_write(ring, reg);
6485         amdgpu_ring_write(ring, 0);
6486         amdgpu_ring_write(ring, val);
6487 }
6488
6489 static void gfx_v8_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
6490 {
6491         struct amdgpu_device *adev = ring->adev;
6492         uint32_t value = 0;
6493
6494         value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
6495         value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
6496         value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
6497         value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
6498         WREG32(mmSQ_CMD, value);
6499 }
6500
6501 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
6502                                                  enum amdgpu_interrupt_state state)
6503 {
6504         WREG32_FIELD(CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE,
6505                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6506 }
6507
6508 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
6509                                                      int me, int pipe,
6510                                                      enum amdgpu_interrupt_state state)
6511 {
6512         u32 mec_int_cntl, mec_int_cntl_reg;
6513
6514         /*
6515          * amdgpu controls only the first MEC. That's why this function only
6516          * handles the setting of interrupts for this specific MEC. All other
6517          * pipes' interrupts are set by amdkfd.
6518          */
6519
6520         if (me == 1) {
6521                 switch (pipe) {
6522                 case 0:
6523                         mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
6524                         break;
6525                 case 1:
6526                         mec_int_cntl_reg = mmCP_ME1_PIPE1_INT_CNTL;
6527                         break;
6528                 case 2:
6529                         mec_int_cntl_reg = mmCP_ME1_PIPE2_INT_CNTL;
6530                         break;
6531                 case 3:
6532                         mec_int_cntl_reg = mmCP_ME1_PIPE3_INT_CNTL;
6533                         break;
6534                 default:
6535                         DRM_DEBUG("invalid pipe %d\n", pipe);
6536                         return;
6537                 }
6538         } else {
6539                 DRM_DEBUG("invalid me %d\n", me);
6540                 return;
6541         }
6542
6543         switch (state) {
6544         case AMDGPU_IRQ_STATE_DISABLE:
6545                 mec_int_cntl = RREG32(mec_int_cntl_reg);
6546                 mec_int_cntl &= ~CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6547                 WREG32(mec_int_cntl_reg, mec_int_cntl);
6548                 break;
6549         case AMDGPU_IRQ_STATE_ENABLE:
6550                 mec_int_cntl = RREG32(mec_int_cntl_reg);
6551                 mec_int_cntl |= CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6552                 WREG32(mec_int_cntl_reg, mec_int_cntl);
6553                 break;
6554         default:
6555                 break;
6556         }
6557 }
6558
6559 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
6560                                              struct amdgpu_irq_src *source,
6561                                              unsigned type,
6562                                              enum amdgpu_interrupt_state state)
6563 {
6564         WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE,
6565                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6566
6567         return 0;
6568 }
6569
6570 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
6571                                               struct amdgpu_irq_src *source,
6572                                               unsigned type,
6573                                               enum amdgpu_interrupt_state state)
6574 {
6575         WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE,
6576                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6577
6578         return 0;
6579 }
6580
6581 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6582                                             struct amdgpu_irq_src *src,
6583                                             unsigned type,
6584                                             enum amdgpu_interrupt_state state)
6585 {
6586         switch (type) {
6587         case AMDGPU_CP_IRQ_GFX_EOP:
6588                 gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
6589                 break;
6590         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6591                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6592                 break;
6593         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6594                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6595                 break;
6596         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6597                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6598                 break;
6599         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6600                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6601                 break;
6602         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6603                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6604                 break;
6605         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6606                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6607                 break;
6608         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6609                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6610                 break;
6611         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6612                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6613                 break;
6614         default:
6615                 break;
6616         }
6617         return 0;
6618 }
6619
6620 static int gfx_v8_0_set_cp_ecc_int_state(struct amdgpu_device *adev,
6621                                          struct amdgpu_irq_src *source,
6622                                          unsigned int type,
6623                                          enum amdgpu_interrupt_state state)
6624 {
6625         int enable_flag;
6626
6627         switch (state) {
6628         case AMDGPU_IRQ_STATE_DISABLE:
6629                 enable_flag = 0;
6630                 break;
6631
6632         case AMDGPU_IRQ_STATE_ENABLE:
6633                 enable_flag = 1;
6634                 break;
6635
6636         default:
6637                 return -EINVAL;
6638         }
6639
6640         WREG32_FIELD(CP_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6641         WREG32_FIELD(CP_INT_CNTL_RING0, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6642         WREG32_FIELD(CP_INT_CNTL_RING1, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6643         WREG32_FIELD(CP_INT_CNTL_RING2, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6644         WREG32_FIELD(CPC_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6645         WREG32_FIELD(CP_ME1_PIPE0_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6646                      enable_flag);
6647         WREG32_FIELD(CP_ME1_PIPE1_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6648                      enable_flag);
6649         WREG32_FIELD(CP_ME1_PIPE2_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6650                      enable_flag);
6651         WREG32_FIELD(CP_ME1_PIPE3_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6652                      enable_flag);
6653         WREG32_FIELD(CP_ME2_PIPE0_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6654                      enable_flag);
6655         WREG32_FIELD(CP_ME2_PIPE1_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6656                      enable_flag);
6657         WREG32_FIELD(CP_ME2_PIPE2_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6658                      enable_flag);
6659         WREG32_FIELD(CP_ME2_PIPE3_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6660                      enable_flag);
6661
6662         return 0;
6663 }
6664
6665 static int gfx_v8_0_set_sq_int_state(struct amdgpu_device *adev,
6666                                      struct amdgpu_irq_src *source,
6667                                      unsigned int type,
6668                                      enum amdgpu_interrupt_state state)
6669 {
6670         int enable_flag;
6671
6672         switch (state) {
6673         case AMDGPU_IRQ_STATE_DISABLE:
6674                 enable_flag = 1;
6675                 break;
6676
6677         case AMDGPU_IRQ_STATE_ENABLE:
6678                 enable_flag = 0;
6679                 break;
6680
6681         default:
6682                 return -EINVAL;
6683         }
6684
6685         WREG32_FIELD(SQ_INTERRUPT_MSG_CTRL, STALL,
6686                      enable_flag);
6687
6688         return 0;
6689 }
6690
6691 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
6692                             struct amdgpu_irq_src *source,
6693                             struct amdgpu_iv_entry *entry)
6694 {
6695         int i;
6696         u8 me_id, pipe_id, queue_id;
6697         struct amdgpu_ring *ring;
6698
6699         DRM_DEBUG("IH: CP EOP\n");
6700         me_id = (entry->ring_id & 0x0c) >> 2;
6701         pipe_id = (entry->ring_id & 0x03) >> 0;
6702         queue_id = (entry->ring_id & 0x70) >> 4;
6703
6704         switch (me_id) {
6705         case 0:
6706                 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6707                 break;
6708         case 1:
6709         case 2:
6710                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6711                         ring = &adev->gfx.compute_ring[i];
6712                         /* Per-queue interrupt is supported for MEC starting from VI.
6713                           * The interrupt can only be enabled/disabled per pipe instead of per queue.
6714                           */
6715                         if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6716                                 amdgpu_fence_process(ring);
6717                 }
6718                 break;
6719         }
6720         return 0;
6721 }
6722
6723 static void gfx_v8_0_fault(struct amdgpu_device *adev,
6724                            struct amdgpu_iv_entry *entry)
6725 {
6726         u8 me_id, pipe_id, queue_id;
6727         struct amdgpu_ring *ring;
6728         int i;
6729
6730         me_id = (entry->ring_id & 0x0c) >> 2;
6731         pipe_id = (entry->ring_id & 0x03) >> 0;
6732         queue_id = (entry->ring_id & 0x70) >> 4;
6733
6734         switch (me_id) {
6735         case 0:
6736                 drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
6737                 break;
6738         case 1:
6739         case 2:
6740                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6741                         ring = &adev->gfx.compute_ring[i];
6742                         if (ring->me == me_id && ring->pipe == pipe_id &&
6743                             ring->queue == queue_id)
6744                                 drm_sched_fault(&ring->sched);
6745                 }
6746                 break;
6747         }
6748 }
6749
6750 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
6751                                  struct amdgpu_irq_src *source,
6752                                  struct amdgpu_iv_entry *entry)
6753 {
6754         DRM_ERROR("Illegal register access in command stream\n");
6755         gfx_v8_0_fault(adev, entry);
6756         return 0;
6757 }
6758
6759 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
6760                                   struct amdgpu_irq_src *source,
6761                                   struct amdgpu_iv_entry *entry)
6762 {
6763         DRM_ERROR("Illegal instruction in command stream\n");
6764         gfx_v8_0_fault(adev, entry);
6765         return 0;
6766 }
6767
6768 static int gfx_v8_0_cp_ecc_error_irq(struct amdgpu_device *adev,
6769                                      struct amdgpu_irq_src *source,
6770                                      struct amdgpu_iv_entry *entry)
6771 {
6772         DRM_ERROR("CP EDC/ECC error detected.");
6773         return 0;
6774 }
6775
6776 static void gfx_v8_0_parse_sq_irq(struct amdgpu_device *adev, unsigned ih_data)
6777 {
6778         u32 enc, se_id, sh_id, cu_id;
6779         char type[20];
6780         int sq_edc_source = -1;
6781
6782         enc = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_CMN, ENCODING);
6783         se_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_CMN, SE_ID);
6784
6785         switch (enc) {
6786                 case 0:
6787                         DRM_INFO("SQ general purpose intr detected:"
6788                                         "se_id %d, immed_overflow %d, host_reg_overflow %d,"
6789                                         "host_cmd_overflow %d, cmd_timestamp %d,"
6790                                         "reg_timestamp %d, thread_trace_buff_full %d,"
6791                                         "wlt %d, thread_trace %d.\n",
6792                                         se_id,
6793                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, IMMED_OVERFLOW),
6794                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, HOST_REG_OVERFLOW),
6795                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, HOST_CMD_OVERFLOW),
6796                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, CMD_TIMESTAMP),
6797                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, REG_TIMESTAMP),
6798                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, THREAD_TRACE_BUF_FULL),
6799                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, WLT),
6800                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, THREAD_TRACE)
6801                                         );
6802                         break;
6803                 case 1:
6804                 case 2:
6805
6806                         cu_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, CU_ID);
6807                         sh_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, SH_ID);
6808
6809                         /*
6810                          * This function can be called either directly from ISR
6811                          * or from BH in which case we can access SQ_EDC_INFO
6812                          * instance
6813                          */
6814                         if (in_task()) {
6815                                 mutex_lock(&adev->grbm_idx_mutex);
6816                                 gfx_v8_0_select_se_sh(adev, se_id, sh_id, cu_id);
6817
6818                                 sq_edc_source = REG_GET_FIELD(RREG32(mmSQ_EDC_INFO), SQ_EDC_INFO, SOURCE);
6819
6820                                 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6821                                 mutex_unlock(&adev->grbm_idx_mutex);
6822                         }
6823
6824                         if (enc == 1)
6825                                 sprintf(type, "instruction intr");
6826                         else
6827                                 sprintf(type, "EDC/ECC error");
6828
6829                         DRM_INFO(
6830                                 "SQ %s detected: "
6831                                         "se_id %d, sh_id %d, cu_id %d, simd_id %d, wave_id %d, vm_id %d "
6832                                         "trap %s, sq_ed_info.source %s.\n",
6833                                         type, se_id, sh_id, cu_id,
6834                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, SIMD_ID),
6835                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, WAVE_ID),
6836                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, VM_ID),
6837                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, PRIV) ? "true" : "false",
6838                                         (sq_edc_source != -1) ? sq_edc_source_names[sq_edc_source] : "unavailable"
6839                                 );
6840                         break;
6841                 default:
6842                         DRM_ERROR("SQ invalid encoding type\n.");
6843         }
6844 }
6845
6846 static void gfx_v8_0_sq_irq_work_func(struct work_struct *work)
6847 {
6848
6849         struct amdgpu_device *adev = container_of(work, struct amdgpu_device, gfx.sq_work.work);
6850         struct sq_work *sq_work = container_of(work, struct sq_work, work);
6851
6852         gfx_v8_0_parse_sq_irq(adev, sq_work->ih_data);
6853 }
6854
6855 static int gfx_v8_0_sq_irq(struct amdgpu_device *adev,
6856                            struct amdgpu_irq_src *source,
6857                            struct amdgpu_iv_entry *entry)
6858 {
6859         unsigned ih_data = entry->src_data[0];
6860
6861         /*
6862          * Try to submit work so SQ_EDC_INFO can be accessed from
6863          * BH. If previous work submission hasn't finished yet
6864          * just print whatever info is possible directly from the ISR.
6865          */
6866         if (work_pending(&adev->gfx.sq_work.work)) {
6867                 gfx_v8_0_parse_sq_irq(adev, ih_data);
6868         } else {
6869                 adev->gfx.sq_work.ih_data = ih_data;
6870                 schedule_work(&adev->gfx.sq_work.work);
6871         }
6872
6873         return 0;
6874 }
6875
6876 static const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
6877         .name = "gfx_v8_0",
6878         .early_init = gfx_v8_0_early_init,
6879         .late_init = gfx_v8_0_late_init,
6880         .sw_init = gfx_v8_0_sw_init,
6881         .sw_fini = gfx_v8_0_sw_fini,
6882         .hw_init = gfx_v8_0_hw_init,
6883         .hw_fini = gfx_v8_0_hw_fini,
6884         .suspend = gfx_v8_0_suspend,
6885         .resume = gfx_v8_0_resume,
6886         .is_idle = gfx_v8_0_is_idle,
6887         .wait_for_idle = gfx_v8_0_wait_for_idle,
6888         .check_soft_reset = gfx_v8_0_check_soft_reset,
6889         .pre_soft_reset = gfx_v8_0_pre_soft_reset,
6890         .soft_reset = gfx_v8_0_soft_reset,
6891         .post_soft_reset = gfx_v8_0_post_soft_reset,
6892         .set_clockgating_state = gfx_v8_0_set_clockgating_state,
6893         .set_powergating_state = gfx_v8_0_set_powergating_state,
6894         .get_clockgating_state = gfx_v8_0_get_clockgating_state,
6895 };
6896
6897 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
6898         .type = AMDGPU_RING_TYPE_GFX,
6899         .align_mask = 0xff,
6900         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6901         .support_64bit_ptrs = false,
6902         .get_rptr = gfx_v8_0_ring_get_rptr,
6903         .get_wptr = gfx_v8_0_ring_get_wptr_gfx,
6904         .set_wptr = gfx_v8_0_ring_set_wptr_gfx,
6905         .emit_frame_size = /* maximum 215dw if count 16 IBs in */
6906                 5 +  /* COND_EXEC */
6907                 7 +  /* PIPELINE_SYNC */
6908                 VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 9 + /* VM_FLUSH */
6909                 8 +  /* FENCE for VM_FLUSH */
6910                 20 + /* GDS switch */
6911                 4 + /* double SWITCH_BUFFER,
6912                        the first COND_EXEC jump to the place just
6913                            prior to this double SWITCH_BUFFER  */
6914                 5 + /* COND_EXEC */
6915                 7 +      /*     HDP_flush */
6916                 4 +      /*     VGT_flush */
6917                 14 + /* CE_META */
6918                 31 + /* DE_META */
6919                 3 + /* CNTX_CTRL */
6920                 5 + /* HDP_INVL */
6921                 8 + 8 + /* FENCE x2 */
6922                 2, /* SWITCH_BUFFER */
6923         .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_gfx */
6924         .emit_ib = gfx_v8_0_ring_emit_ib_gfx,
6925         .emit_fence = gfx_v8_0_ring_emit_fence_gfx,
6926         .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6927         .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6928         .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6929         .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6930         .test_ring = gfx_v8_0_ring_test_ring,
6931         .test_ib = gfx_v8_0_ring_test_ib,
6932         .insert_nop = amdgpu_ring_insert_nop,
6933         .pad_ib = amdgpu_ring_generic_pad_ib,
6934         .emit_switch_buffer = gfx_v8_ring_emit_sb,
6935         .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
6936         .init_cond_exec = gfx_v8_0_ring_emit_init_cond_exec,
6937         .patch_cond_exec = gfx_v8_0_ring_emit_patch_cond_exec,
6938         .emit_wreg = gfx_v8_0_ring_emit_wreg,
6939         .soft_recovery = gfx_v8_0_ring_soft_recovery,
6940 };
6941
6942 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
6943         .type = AMDGPU_RING_TYPE_COMPUTE,
6944         .align_mask = 0xff,
6945         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6946         .support_64bit_ptrs = false,
6947         .get_rptr = gfx_v8_0_ring_get_rptr,
6948         .get_wptr = gfx_v8_0_ring_get_wptr_compute,
6949         .set_wptr = gfx_v8_0_ring_set_wptr_compute,
6950         .emit_frame_size =
6951                 20 + /* gfx_v8_0_ring_emit_gds_switch */
6952                 7 + /* gfx_v8_0_ring_emit_hdp_flush */
6953                 5 + /* hdp_invalidate */
6954                 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6955                 VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v8_0_ring_emit_vm_flush */
6956                 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
6957         .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */
6958         .emit_ib = gfx_v8_0_ring_emit_ib_compute,
6959         .emit_fence = gfx_v8_0_ring_emit_fence_compute,
6960         .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6961         .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6962         .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6963         .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6964         .test_ring = gfx_v8_0_ring_test_ring,
6965         .test_ib = gfx_v8_0_ring_test_ib,
6966         .insert_nop = amdgpu_ring_insert_nop,
6967         .pad_ib = amdgpu_ring_generic_pad_ib,
6968         .set_priority = gfx_v8_0_ring_set_priority_compute,
6969         .emit_wreg = gfx_v8_0_ring_emit_wreg,
6970 };
6971
6972 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {
6973         .type = AMDGPU_RING_TYPE_KIQ,
6974         .align_mask = 0xff,
6975         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6976         .support_64bit_ptrs = false,
6977         .get_rptr = gfx_v8_0_ring_get_rptr,
6978         .get_wptr = gfx_v8_0_ring_get_wptr_compute,
6979         .set_wptr = gfx_v8_0_ring_set_wptr_compute,
6980         .emit_frame_size =
6981                 20 + /* gfx_v8_0_ring_emit_gds_switch */
6982                 7 + /* gfx_v8_0_ring_emit_hdp_flush */
6983                 5 + /* hdp_invalidate */
6984                 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6985                 17 + /* gfx_v8_0_ring_emit_vm_flush */
6986                 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */
6987         .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */
6988         .emit_fence = gfx_v8_0_ring_emit_fence_kiq,
6989         .test_ring = gfx_v8_0_ring_test_ring,
6990         .insert_nop = amdgpu_ring_insert_nop,
6991         .pad_ib = amdgpu_ring_generic_pad_ib,
6992         .emit_rreg = gfx_v8_0_ring_emit_rreg,
6993         .emit_wreg = gfx_v8_0_ring_emit_wreg,
6994 };
6995
6996 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
6997 {
6998         int i;
6999
7000         adev->gfx.kiq.ring.funcs = &gfx_v8_0_ring_funcs_kiq;
7001
7002         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
7003                 adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
7004
7005         for (i = 0; i < adev->gfx.num_compute_rings; i++)
7006                 adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
7007 }
7008
7009 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
7010         .set = gfx_v8_0_set_eop_interrupt_state,
7011         .process = gfx_v8_0_eop_irq,
7012 };
7013
7014 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
7015         .set = gfx_v8_0_set_priv_reg_fault_state,
7016         .process = gfx_v8_0_priv_reg_irq,
7017 };
7018
7019 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
7020         .set = gfx_v8_0_set_priv_inst_fault_state,
7021         .process = gfx_v8_0_priv_inst_irq,
7022 };
7023
7024 static const struct amdgpu_irq_src_funcs gfx_v8_0_cp_ecc_error_irq_funcs = {
7025         .set = gfx_v8_0_set_cp_ecc_int_state,
7026         .process = gfx_v8_0_cp_ecc_error_irq,
7027 };
7028
7029 static const struct amdgpu_irq_src_funcs gfx_v8_0_sq_irq_funcs = {
7030         .set = gfx_v8_0_set_sq_int_state,
7031         .process = gfx_v8_0_sq_irq,
7032 };
7033
7034 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
7035 {
7036         adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
7037         adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
7038
7039         adev->gfx.priv_reg_irq.num_types = 1;
7040         adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
7041
7042         adev->gfx.priv_inst_irq.num_types = 1;
7043         adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
7044
7045         adev->gfx.cp_ecc_error_irq.num_types = 1;
7046         adev->gfx.cp_ecc_error_irq.funcs = &gfx_v8_0_cp_ecc_error_irq_funcs;
7047
7048         adev->gfx.sq_irq.num_types = 1;
7049         adev->gfx.sq_irq.funcs = &gfx_v8_0_sq_irq_funcs;
7050 }
7051
7052 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
7053 {
7054         adev->gfx.rlc.funcs = &iceland_rlc_funcs;
7055 }
7056
7057 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
7058 {
7059         /* init asci gds info */
7060         adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
7061         adev->gds.gws.total_size = 64;
7062         adev->gds.oa.total_size = 16;
7063
7064         if (adev->gds.mem.total_size == 64 * 1024) {
7065                 adev->gds.mem.gfx_partition_size = 4096;
7066                 adev->gds.mem.cs_partition_size = 4096;
7067
7068                 adev->gds.gws.gfx_partition_size = 4;
7069                 adev->gds.gws.cs_partition_size = 4;
7070
7071                 adev->gds.oa.gfx_partition_size = 4;
7072                 adev->gds.oa.cs_partition_size = 1;
7073         } else {
7074                 adev->gds.mem.gfx_partition_size = 1024;
7075                 adev->gds.mem.cs_partition_size = 1024;
7076
7077                 adev->gds.gws.gfx_partition_size = 16;
7078                 adev->gds.gws.cs_partition_size = 16;
7079
7080                 adev->gds.oa.gfx_partition_size = 4;
7081                 adev->gds.oa.cs_partition_size = 4;
7082         }
7083 }
7084
7085 static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
7086                                                  u32 bitmap)
7087 {
7088         u32 data;
7089
7090         if (!bitmap)
7091                 return;
7092
7093         data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7094         data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7095
7096         WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
7097 }
7098
7099 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
7100 {
7101         u32 data, mask;
7102
7103         data =  RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) |
7104                 RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
7105
7106         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
7107
7108         return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask;
7109 }
7110
7111 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
7112 {
7113         int i, j, k, counter, active_cu_number = 0;
7114         u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
7115         struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
7116         unsigned disable_masks[4 * 2];
7117         u32 ao_cu_num;
7118
7119         memset(cu_info, 0, sizeof(*cu_info));
7120
7121         if (adev->flags & AMD_IS_APU)
7122                 ao_cu_num = 2;
7123         else
7124                 ao_cu_num = adev->gfx.config.max_cu_per_sh;
7125
7126         amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
7127
7128         mutex_lock(&adev->grbm_idx_mutex);
7129         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
7130                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
7131                         mask = 1;
7132                         ao_bitmap = 0;
7133                         counter = 0;
7134                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
7135                         if (i < 4 && j < 2)
7136                                 gfx_v8_0_set_user_cu_inactive_bitmap(
7137                                         adev, disable_masks[i * 2 + j]);
7138                         bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
7139                         cu_info->bitmap[i][j] = bitmap;
7140
7141                         for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
7142                                 if (bitmap & mask) {
7143                                         if (counter < ao_cu_num)
7144                                                 ao_bitmap |= mask;
7145                                         counter ++;
7146                                 }
7147                                 mask <<= 1;
7148                         }
7149                         active_cu_number += counter;
7150                         if (i < 2 && j < 2)
7151                                 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
7152                         cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
7153                 }
7154         }
7155         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
7156         mutex_unlock(&adev->grbm_idx_mutex);
7157
7158         cu_info->number = active_cu_number;
7159         cu_info->ao_cu_mask = ao_cu_mask;
7160         cu_info->simd_per_cu = NUM_SIMD_PER_CU;
7161         cu_info->max_waves_per_simd = 10;
7162         cu_info->max_scratch_slots_per_cu = 32;
7163         cu_info->wave_front_size = 64;
7164         cu_info->lds_size = 64;
7165 }
7166
7167 const struct amdgpu_ip_block_version gfx_v8_0_ip_block =
7168 {
7169         .type = AMD_IP_BLOCK_TYPE_GFX,
7170         .major = 8,
7171         .minor = 0,
7172         .rev = 0,
7173         .funcs = &gfx_v8_0_ip_funcs,
7174 };
7175
7176 const struct amdgpu_ip_block_version gfx_v8_1_ip_block =
7177 {
7178         .type = AMD_IP_BLOCK_TYPE_GFX,
7179         .major = 8,
7180         .minor = 1,
7181         .rev = 0,
7182         .funcs = &gfx_v8_0_ip_funcs,
7183 };
7184
7185 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
7186 {
7187         uint64_t ce_payload_addr;
7188         int cnt_ce;
7189         union {
7190                 struct vi_ce_ib_state regular;
7191                 struct vi_ce_ib_state_chained_ib chained;
7192         } ce_payload = {};
7193
7194         if (ring->adev->virt.chained_ib_support) {
7195                 ce_payload_addr = amdgpu_csa_vaddr(ring->adev) +
7196                         offsetof(struct vi_gfx_meta_data_chained_ib, ce_payload);
7197                 cnt_ce = (sizeof(ce_payload.chained) >> 2) + 4 - 2;
7198         } else {
7199                 ce_payload_addr = amdgpu_csa_vaddr(ring->adev) +
7200                         offsetof(struct vi_gfx_meta_data, ce_payload);
7201                 cnt_ce = (sizeof(ce_payload.regular) >> 2) + 4 - 2;
7202         }
7203
7204         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_ce));
7205         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
7206                                 WRITE_DATA_DST_SEL(8) |
7207                                 WR_CONFIRM) |
7208                                 WRITE_DATA_CACHE_POLICY(0));
7209         amdgpu_ring_write(ring, lower_32_bits(ce_payload_addr));
7210         amdgpu_ring_write(ring, upper_32_bits(ce_payload_addr));
7211         amdgpu_ring_write_multiple(ring, (void *)&ce_payload, cnt_ce - 2);
7212 }
7213
7214 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring)
7215 {
7216         uint64_t de_payload_addr, gds_addr, csa_addr;
7217         int cnt_de;
7218         union {
7219                 struct vi_de_ib_state regular;
7220                 struct vi_de_ib_state_chained_ib chained;
7221         } de_payload = {};
7222
7223         csa_addr = amdgpu_csa_vaddr(ring->adev);
7224         gds_addr = csa_addr + 4096;
7225         if (ring->adev->virt.chained_ib_support) {
7226                 de_payload.chained.gds_backup_addrlo = lower_32_bits(gds_addr);
7227                 de_payload.chained.gds_backup_addrhi = upper_32_bits(gds_addr);
7228                 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data_chained_ib, de_payload);
7229                 cnt_de = (sizeof(de_payload.chained) >> 2) + 4 - 2;
7230         } else {
7231                 de_payload.regular.gds_backup_addrlo = lower_32_bits(gds_addr);
7232                 de_payload.regular.gds_backup_addrhi = upper_32_bits(gds_addr);
7233                 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data, de_payload);
7234                 cnt_de = (sizeof(de_payload.regular) >> 2) + 4 - 2;
7235         }
7236
7237         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_de));
7238         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
7239                                 WRITE_DATA_DST_SEL(8) |
7240                                 WR_CONFIRM) |
7241                                 WRITE_DATA_CACHE_POLICY(0));
7242         amdgpu_ring_write(ring, lower_32_bits(de_payload_addr));
7243         amdgpu_ring_write(ring, upper_32_bits(de_payload_addr));
7244         amdgpu_ring_write_multiple(ring, (void *)&de_payload, cnt_de - 2);
7245 }
This page took 0.472583 seconds and 4 git commands to generate.