]> Git Repo - linux.git/blob - drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
Merge drm/drm-next into drm-misc-next
[linux.git] / drivers / gpu / drm / amd / amdgpu / gfx_v8_0.c
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 #include <linux/kernel.h>
24 #include <linux/firmware.h>
25 #include <drm/drmP.h>
26 #include "amdgpu.h"
27 #include "amdgpu_gfx.h"
28 #include "vi.h"
29 #include "vi_structs.h"
30 #include "vid.h"
31 #include "amdgpu_ucode.h"
32 #include "amdgpu_atombios.h"
33 #include "atombios_i2c.h"
34 #include "clearstate_vi.h"
35
36 #include "gmc/gmc_8_2_d.h"
37 #include "gmc/gmc_8_2_sh_mask.h"
38
39 #include "oss/oss_3_0_d.h"
40 #include "oss/oss_3_0_sh_mask.h"
41
42 #include "bif/bif_5_0_d.h"
43 #include "bif/bif_5_0_sh_mask.h"
44 #include "gca/gfx_8_0_d.h"
45 #include "gca/gfx_8_0_enum.h"
46 #include "gca/gfx_8_0_sh_mask.h"
47 #include "gca/gfx_8_0_enum.h"
48
49 #include "dce/dce_10_0_d.h"
50 #include "dce/dce_10_0_sh_mask.h"
51
52 #include "smu/smu_7_1_3_d.h"
53
54 #include "ivsrcid/ivsrcid_vislands30.h"
55
56 #define GFX8_NUM_GFX_RINGS     1
57 #define GFX8_MEC_HPD_SIZE 2048
58
59 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
60 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
61 #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
62 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
63
64 #define ARRAY_MODE(x)                                   ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
65 #define PIPE_CONFIG(x)                                  ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
66 #define TILE_SPLIT(x)                                   ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
67 #define MICRO_TILE_MODE_NEW(x)                          ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
68 #define SAMPLE_SPLIT(x)                                 ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
69 #define BANK_WIDTH(x)                                   ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
70 #define BANK_HEIGHT(x)                                  ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
71 #define MACRO_TILE_ASPECT(x)                            ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
72 #define NUM_BANKS(x)                                    ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
73
74 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK            0x00000001L
75 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK            0x00000002L
76 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK           0x00000004L
77 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK           0x00000008L
78 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK           0x00000010L
79 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK           0x00000020L
80
81 /* BPM SERDES CMD */
82 #define SET_BPM_SERDES_CMD    1
83 #define CLE_BPM_SERDES_CMD    0
84
85 /* BPM Register Address*/
86 enum {
87         BPM_REG_CGLS_EN = 0,        /* Enable/Disable CGLS */
88         BPM_REG_CGLS_ON,            /* ON/OFF CGLS: shall be controlled by RLC FW */
89         BPM_REG_CGCG_OVERRIDE,      /* Set/Clear CGCG Override */
90         BPM_REG_MGCG_OVERRIDE,      /* Set/Clear MGCG Override */
91         BPM_REG_FGCG_OVERRIDE,      /* Set/Clear FGCG Override */
92         BPM_REG_FGCG_MAX
93 };
94
95 #define RLC_FormatDirectRegListLength        14
96
97 MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
98 MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
99 MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
100 MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
101 MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
102 MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
103
104 MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
105 MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
106 MODULE_FIRMWARE("amdgpu/stoney_me.bin");
107 MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
108 MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
109
110 MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
111 MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
112 MODULE_FIRMWARE("amdgpu/tonga_me.bin");
113 MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
114 MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
115 MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
116
117 MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
118 MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
119 MODULE_FIRMWARE("amdgpu/topaz_me.bin");
120 MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
121 MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
122
123 MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
124 MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
125 MODULE_FIRMWARE("amdgpu/fiji_me.bin");
126 MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
127 MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
128 MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
129
130 MODULE_FIRMWARE("amdgpu/polaris10_ce.bin");
131 MODULE_FIRMWARE("amdgpu/polaris10_ce_2.bin");
132 MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin");
133 MODULE_FIRMWARE("amdgpu/polaris10_pfp_2.bin");
134 MODULE_FIRMWARE("amdgpu/polaris10_me.bin");
135 MODULE_FIRMWARE("amdgpu/polaris10_me_2.bin");
136 MODULE_FIRMWARE("amdgpu/polaris10_mec.bin");
137 MODULE_FIRMWARE("amdgpu/polaris10_mec_2.bin");
138 MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin");
139 MODULE_FIRMWARE("amdgpu/polaris10_mec2_2.bin");
140 MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin");
141
142 MODULE_FIRMWARE("amdgpu/polaris11_ce.bin");
143 MODULE_FIRMWARE("amdgpu/polaris11_ce_2.bin");
144 MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin");
145 MODULE_FIRMWARE("amdgpu/polaris11_pfp_2.bin");
146 MODULE_FIRMWARE("amdgpu/polaris11_me.bin");
147 MODULE_FIRMWARE("amdgpu/polaris11_me_2.bin");
148 MODULE_FIRMWARE("amdgpu/polaris11_mec.bin");
149 MODULE_FIRMWARE("amdgpu/polaris11_mec_2.bin");
150 MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin");
151 MODULE_FIRMWARE("amdgpu/polaris11_mec2_2.bin");
152 MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin");
153
154 MODULE_FIRMWARE("amdgpu/polaris12_ce.bin");
155 MODULE_FIRMWARE("amdgpu/polaris12_ce_2.bin");
156 MODULE_FIRMWARE("amdgpu/polaris12_pfp.bin");
157 MODULE_FIRMWARE("amdgpu/polaris12_pfp_2.bin");
158 MODULE_FIRMWARE("amdgpu/polaris12_me.bin");
159 MODULE_FIRMWARE("amdgpu/polaris12_me_2.bin");
160 MODULE_FIRMWARE("amdgpu/polaris12_mec.bin");
161 MODULE_FIRMWARE("amdgpu/polaris12_mec_2.bin");
162 MODULE_FIRMWARE("amdgpu/polaris12_mec2.bin");
163 MODULE_FIRMWARE("amdgpu/polaris12_mec2_2.bin");
164 MODULE_FIRMWARE("amdgpu/polaris12_rlc.bin");
165
166 MODULE_FIRMWARE("amdgpu/vegam_ce.bin");
167 MODULE_FIRMWARE("amdgpu/vegam_pfp.bin");
168 MODULE_FIRMWARE("amdgpu/vegam_me.bin");
169 MODULE_FIRMWARE("amdgpu/vegam_mec.bin");
170 MODULE_FIRMWARE("amdgpu/vegam_mec2.bin");
171 MODULE_FIRMWARE("amdgpu/vegam_rlc.bin");
172
173 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
174 {
175         {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
176         {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
177         {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
178         {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
179         {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
180         {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
181         {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
182         {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
183         {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
184         {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
185         {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
186         {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
187         {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
188         {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
189         {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
190         {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
191 };
192
193 static const u32 golden_settings_tonga_a11[] =
194 {
195         mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
196         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
197         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
198         mmGB_GPU_ID, 0x0000000f, 0x00000000,
199         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
200         mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
201         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
202         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
203         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
204         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
205         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
206         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
207         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
208         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
209         mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
210         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
211 };
212
213 static const u32 tonga_golden_common_all[] =
214 {
215         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
216         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
217         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
218         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
219         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
220         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
221         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
222         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
223 };
224
225 static const u32 tonga_mgcg_cgcg_init[] =
226 {
227         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
228         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
229         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
230         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
231         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
232         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
233         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
234         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
235         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
236         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
237         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
238         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
239         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
240         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
241         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
242         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
243         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
244         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
245         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
246         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
247         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
248         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
249         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
250         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
251         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
252         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
253         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
254         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
255         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
256         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
257         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
258         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
259         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
260         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
261         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
262         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
263         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
264         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
265         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
266         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
267         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
268         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
269         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
270         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
271         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
272         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
273         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
274         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
275         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
276         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
277         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
278         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
279         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
280         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
281         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
282         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
283         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
284         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
285         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
286         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
287         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
288         mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
289         mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
290         mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
291         mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
292         mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
293         mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
294         mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
295         mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
296         mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
297         mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
298         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
299         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
300         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
301         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
302 };
303
304 static const u32 golden_settings_vegam_a11[] =
305 {
306         mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
307         mmCB_HW_CONTROL_2, 0x0f000000, 0x0d000000,
308         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
309         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
310         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
311         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
312         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x3a00161a,
313         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002e,
314         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
315         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
316         mmSQ_CONFIG, 0x07f80000, 0x01180000,
317         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
318         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
319         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
320         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
321         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x32761054,
322         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
323 };
324
325 static const u32 vegam_golden_common_all[] =
326 {
327         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
328         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
329         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
330         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
331         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
332         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
333 };
334
335 static const u32 golden_settings_polaris11_a11[] =
336 {
337         mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208,
338         mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
339         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
340         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
341         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
342         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
343         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
344         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
345         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
346         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
347         mmSQ_CONFIG, 0x07f80000, 0x01180000,
348         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
349         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
350         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
351         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
352         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
353         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
354 };
355
356 static const u32 polaris11_golden_common_all[] =
357 {
358         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
359         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002,
360         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
361         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
362         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
363         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
364 };
365
366 static const u32 golden_settings_polaris10_a11[] =
367 {
368         mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
369         mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
370         mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
371         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
372         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
373         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
374         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
375         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
376         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a,
377         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
378         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
379         mmSQ_CONFIG, 0x07f80000, 0x07180000,
380         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
381         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
382         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
383         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
384         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
385 };
386
387 static const u32 polaris10_golden_common_all[] =
388 {
389         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
390         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
391         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
392         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
393         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
394         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
395         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
396         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
397 };
398
399 static const u32 fiji_golden_common_all[] =
400 {
401         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
402         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
403         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
404         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
405         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
406         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
407         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
408         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
409         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
410         mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
411 };
412
413 static const u32 golden_settings_fiji_a10[] =
414 {
415         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
416         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
417         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
418         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
419         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
420         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
421         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
422         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
423         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
424         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
425         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
426 };
427
428 static const u32 fiji_mgcg_cgcg_init[] =
429 {
430         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
431         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
432         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
433         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
434         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
435         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
436         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
437         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
438         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
439         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
440         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
441         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
442         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
443         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
444         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
445         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
446         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
447         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
448         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
449         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
450         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
451         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
452         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
453         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
454         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
455         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
456         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
457         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
458         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
459         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
460         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
461         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
462         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
463         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
464         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
465 };
466
467 static const u32 golden_settings_iceland_a11[] =
468 {
469         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
470         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
471         mmDB_DEBUG3, 0xc0000000, 0xc0000000,
472         mmGB_GPU_ID, 0x0000000f, 0x00000000,
473         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
474         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
475         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
476         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
477         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
478         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
479         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
480         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
481         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
482         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
483         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
484         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
485 };
486
487 static const u32 iceland_golden_common_all[] =
488 {
489         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
490         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
491         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
492         mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
493         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
494         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
495         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
496         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
497 };
498
499 static const u32 iceland_mgcg_cgcg_init[] =
500 {
501         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
502         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
503         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
504         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
505         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
506         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
507         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
508         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
509         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
510         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
511         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
512         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
513         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
514         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
515         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
516         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
517         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
518         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
519         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
520         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
521         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
522         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
523         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
524         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
525         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
526         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
527         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
528         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
529         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
530         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
531         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
532         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
533         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
534         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
535         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
536         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
537         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
538         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
539         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
540         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
541         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
542         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
543         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
544         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
545         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
546         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
547         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
548         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
549         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
550         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
551         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
552         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
553         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
554         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
555         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
556         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
557         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
558         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
559         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
560         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
561         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
562         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
563         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
564         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
565 };
566
567 static const u32 cz_golden_settings_a11[] =
568 {
569         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
570         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
571         mmGB_GPU_ID, 0x0000000f, 0x00000000,
572         mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
573         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
574         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
575         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
576         mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
577         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
578         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
579         mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
580         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
581 };
582
583 static const u32 cz_golden_common_all[] =
584 {
585         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
586         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
587         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
588         mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
589         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
590         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
591         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
592         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
593 };
594
595 static const u32 cz_mgcg_cgcg_init[] =
596 {
597         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
598         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
599         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
600         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
601         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
602         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
603         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
604         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
605         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
606         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
607         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
608         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
609         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
610         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
611         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
612         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
613         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
614         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
615         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
616         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
617         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
618         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
619         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
620         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
621         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
622         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
623         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
624         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
625         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
626         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
627         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
628         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
629         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
630         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
631         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
632         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
633         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
634         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
635         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
636         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
637         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
638         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
639         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
640         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
641         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
642         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
643         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
644         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
645         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
646         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
647         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
648         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
649         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
650         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
651         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
652         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
653         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
654         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
655         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
656         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
657         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
658         mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
659         mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
660         mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
661         mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
662         mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
663         mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
664         mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
665         mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
666         mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
667         mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
668         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
669         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
670         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
671         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
672 };
673
674 static const u32 stoney_golden_settings_a11[] =
675 {
676         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
677         mmGB_GPU_ID, 0x0000000f, 0x00000000,
678         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
679         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
680         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
681         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
682         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
683         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
684         mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
685         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
686 };
687
688 static const u32 stoney_golden_common_all[] =
689 {
690         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
691         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
692         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
693         mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
694         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
695         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
696         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
697         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
698 };
699
700 static const u32 stoney_mgcg_cgcg_init[] =
701 {
702         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
703         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
704         mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
705         mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
706         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
707 };
708
709
710 static const char * const sq_edc_source_names[] = {
711         "SQ_EDC_INFO_SOURCE_INVALID: No EDC error has occurred",
712         "SQ_EDC_INFO_SOURCE_INST: EDC source is Instruction Fetch",
713         "SQ_EDC_INFO_SOURCE_SGPR: EDC source is SGPR or SQC data return",
714         "SQ_EDC_INFO_SOURCE_VGPR: EDC source is VGPR",
715         "SQ_EDC_INFO_SOURCE_LDS: EDC source is LDS",
716         "SQ_EDC_INFO_SOURCE_GDS: EDC source is GDS",
717         "SQ_EDC_INFO_SOURCE_TA: EDC source is TA",
718 };
719
720 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
721 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
722 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
723 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
724 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
725 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
726 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring);
727 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring);
728
729 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
730 {
731         switch (adev->asic_type) {
732         case CHIP_TOPAZ:
733                 amdgpu_device_program_register_sequence(adev,
734                                                         iceland_mgcg_cgcg_init,
735                                                         ARRAY_SIZE(iceland_mgcg_cgcg_init));
736                 amdgpu_device_program_register_sequence(adev,
737                                                         golden_settings_iceland_a11,
738                                                         ARRAY_SIZE(golden_settings_iceland_a11));
739                 amdgpu_device_program_register_sequence(adev,
740                                                         iceland_golden_common_all,
741                                                         ARRAY_SIZE(iceland_golden_common_all));
742                 break;
743         case CHIP_FIJI:
744                 amdgpu_device_program_register_sequence(adev,
745                                                         fiji_mgcg_cgcg_init,
746                                                         ARRAY_SIZE(fiji_mgcg_cgcg_init));
747                 amdgpu_device_program_register_sequence(adev,
748                                                         golden_settings_fiji_a10,
749                                                         ARRAY_SIZE(golden_settings_fiji_a10));
750                 amdgpu_device_program_register_sequence(adev,
751                                                         fiji_golden_common_all,
752                                                         ARRAY_SIZE(fiji_golden_common_all));
753                 break;
754
755         case CHIP_TONGA:
756                 amdgpu_device_program_register_sequence(adev,
757                                                         tonga_mgcg_cgcg_init,
758                                                         ARRAY_SIZE(tonga_mgcg_cgcg_init));
759                 amdgpu_device_program_register_sequence(adev,
760                                                         golden_settings_tonga_a11,
761                                                         ARRAY_SIZE(golden_settings_tonga_a11));
762                 amdgpu_device_program_register_sequence(adev,
763                                                         tonga_golden_common_all,
764                                                         ARRAY_SIZE(tonga_golden_common_all));
765                 break;
766         case CHIP_VEGAM:
767                 amdgpu_device_program_register_sequence(adev,
768                                                         golden_settings_vegam_a11,
769                                                         ARRAY_SIZE(golden_settings_vegam_a11));
770                 amdgpu_device_program_register_sequence(adev,
771                                                         vegam_golden_common_all,
772                                                         ARRAY_SIZE(vegam_golden_common_all));
773                 break;
774         case CHIP_POLARIS11:
775         case CHIP_POLARIS12:
776                 amdgpu_device_program_register_sequence(adev,
777                                                         golden_settings_polaris11_a11,
778                                                         ARRAY_SIZE(golden_settings_polaris11_a11));
779                 amdgpu_device_program_register_sequence(adev,
780                                                         polaris11_golden_common_all,
781                                                         ARRAY_SIZE(polaris11_golden_common_all));
782                 break;
783         case CHIP_POLARIS10:
784                 amdgpu_device_program_register_sequence(adev,
785                                                         golden_settings_polaris10_a11,
786                                                         ARRAY_SIZE(golden_settings_polaris10_a11));
787                 amdgpu_device_program_register_sequence(adev,
788                                                         polaris10_golden_common_all,
789                                                         ARRAY_SIZE(polaris10_golden_common_all));
790                 WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C);
791                 if (adev->pdev->revision == 0xc7 &&
792                     ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) ||
793                      (adev->pdev->subsystem_device == 0x4a8 && adev->pdev->subsystem_vendor == 0x1043) ||
794                      (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1682))) {
795                         amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD);
796                         amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0);
797                 }
798                 break;
799         case CHIP_CARRIZO:
800                 amdgpu_device_program_register_sequence(adev,
801                                                         cz_mgcg_cgcg_init,
802                                                         ARRAY_SIZE(cz_mgcg_cgcg_init));
803                 amdgpu_device_program_register_sequence(adev,
804                                                         cz_golden_settings_a11,
805                                                         ARRAY_SIZE(cz_golden_settings_a11));
806                 amdgpu_device_program_register_sequence(adev,
807                                                         cz_golden_common_all,
808                                                         ARRAY_SIZE(cz_golden_common_all));
809                 break;
810         case CHIP_STONEY:
811                 amdgpu_device_program_register_sequence(adev,
812                                                         stoney_mgcg_cgcg_init,
813                                                         ARRAY_SIZE(stoney_mgcg_cgcg_init));
814                 amdgpu_device_program_register_sequence(adev,
815                                                         stoney_golden_settings_a11,
816                                                         ARRAY_SIZE(stoney_golden_settings_a11));
817                 amdgpu_device_program_register_sequence(adev,
818                                                         stoney_golden_common_all,
819                                                         ARRAY_SIZE(stoney_golden_common_all));
820                 break;
821         default:
822                 break;
823         }
824 }
825
826 static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
827 {
828         adev->gfx.scratch.num_reg = 8;
829         adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
830         adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
831 }
832
833 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
834 {
835         struct amdgpu_device *adev = ring->adev;
836         uint32_t scratch;
837         uint32_t tmp = 0;
838         unsigned i;
839         int r;
840
841         r = amdgpu_gfx_scratch_get(adev, &scratch);
842         if (r) {
843                 DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
844                 return r;
845         }
846         WREG32(scratch, 0xCAFEDEAD);
847         r = amdgpu_ring_alloc(ring, 3);
848         if (r) {
849                 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
850                           ring->idx, r);
851                 amdgpu_gfx_scratch_free(adev, scratch);
852                 return r;
853         }
854         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
855         amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
856         amdgpu_ring_write(ring, 0xDEADBEEF);
857         amdgpu_ring_commit(ring);
858
859         for (i = 0; i < adev->usec_timeout; i++) {
860                 tmp = RREG32(scratch);
861                 if (tmp == 0xDEADBEEF)
862                         break;
863                 DRM_UDELAY(1);
864         }
865         if (i < adev->usec_timeout) {
866                 DRM_DEBUG("ring test on %d succeeded in %d usecs\n",
867                          ring->idx, i);
868         } else {
869                 DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
870                           ring->idx, scratch, tmp);
871                 r = -EINVAL;
872         }
873         amdgpu_gfx_scratch_free(adev, scratch);
874         return r;
875 }
876
877 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
878 {
879         struct amdgpu_device *adev = ring->adev;
880         struct amdgpu_ib ib;
881         struct dma_fence *f = NULL;
882
883         unsigned int index;
884         uint64_t gpu_addr;
885         uint32_t tmp;
886         long r;
887
888         r = amdgpu_device_wb_get(adev, &index);
889         if (r) {
890                 dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r);
891                 return r;
892         }
893
894         gpu_addr = adev->wb.gpu_addr + (index * 4);
895         adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
896         memset(&ib, 0, sizeof(ib));
897         r = amdgpu_ib_get(adev, NULL, 16, &ib);
898         if (r) {
899                 DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
900                 goto err1;
901         }
902         ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
903         ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
904         ib.ptr[2] = lower_32_bits(gpu_addr);
905         ib.ptr[3] = upper_32_bits(gpu_addr);
906         ib.ptr[4] = 0xDEADBEEF;
907         ib.length_dw = 5;
908
909         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
910         if (r)
911                 goto err2;
912
913         r = dma_fence_wait_timeout(f, false, timeout);
914         if (r == 0) {
915                 DRM_ERROR("amdgpu: IB test timed out.\n");
916                 r = -ETIMEDOUT;
917                 goto err2;
918         } else if (r < 0) {
919                 DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
920                 goto err2;
921         }
922
923         tmp = adev->wb.wb[index];
924         if (tmp == 0xDEADBEEF) {
925                 DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx);
926                 r = 0;
927         } else {
928                 DRM_ERROR("ib test on ring %d failed\n", ring->idx);
929                 r = -EINVAL;
930         }
931
932 err2:
933         amdgpu_ib_free(adev, &ib, NULL);
934         dma_fence_put(f);
935 err1:
936         amdgpu_device_wb_free(adev, index);
937         return r;
938 }
939
940
941 static void gfx_v8_0_free_microcode(struct amdgpu_device *adev)
942 {
943         release_firmware(adev->gfx.pfp_fw);
944         adev->gfx.pfp_fw = NULL;
945         release_firmware(adev->gfx.me_fw);
946         adev->gfx.me_fw = NULL;
947         release_firmware(adev->gfx.ce_fw);
948         adev->gfx.ce_fw = NULL;
949         release_firmware(adev->gfx.rlc_fw);
950         adev->gfx.rlc_fw = NULL;
951         release_firmware(adev->gfx.mec_fw);
952         adev->gfx.mec_fw = NULL;
953         if ((adev->asic_type != CHIP_STONEY) &&
954             (adev->asic_type != CHIP_TOPAZ))
955                 release_firmware(adev->gfx.mec2_fw);
956         adev->gfx.mec2_fw = NULL;
957
958         kfree(adev->gfx.rlc.register_list_format);
959 }
960
961 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
962 {
963         const char *chip_name;
964         char fw_name[30];
965         int err;
966         struct amdgpu_firmware_info *info = NULL;
967         const struct common_firmware_header *header = NULL;
968         const struct gfx_firmware_header_v1_0 *cp_hdr;
969         const struct rlc_firmware_header_v2_0 *rlc_hdr;
970         unsigned int *tmp = NULL, i;
971
972         DRM_DEBUG("\n");
973
974         switch (adev->asic_type) {
975         case CHIP_TOPAZ:
976                 chip_name = "topaz";
977                 break;
978         case CHIP_TONGA:
979                 chip_name = "tonga";
980                 break;
981         case CHIP_CARRIZO:
982                 chip_name = "carrizo";
983                 break;
984         case CHIP_FIJI:
985                 chip_name = "fiji";
986                 break;
987         case CHIP_STONEY:
988                 chip_name = "stoney";
989                 break;
990         case CHIP_POLARIS10:
991                 chip_name = "polaris10";
992                 break;
993         case CHIP_POLARIS11:
994                 chip_name = "polaris11";
995                 break;
996         case CHIP_POLARIS12:
997                 chip_name = "polaris12";
998                 break;
999         case CHIP_VEGAM:
1000                 chip_name = "vegam";
1001                 break;
1002         default:
1003                 BUG();
1004         }
1005
1006         if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1007                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp_2.bin", chip_name);
1008                 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
1009                 if (err == -ENOENT) {
1010                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
1011                         err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
1012                 }
1013         } else {
1014                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
1015                 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
1016         }
1017         if (err)
1018                 goto out;
1019         err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
1020         if (err)
1021                 goto out;
1022         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1023         adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1024         adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1025
1026         if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1027                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me_2.bin", chip_name);
1028                 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1029                 if (err == -ENOENT) {
1030                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1031                         err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1032                 }
1033         } else {
1034                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1035                 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1036         }
1037         if (err)
1038                 goto out;
1039         err = amdgpu_ucode_validate(adev->gfx.me_fw);
1040         if (err)
1041                 goto out;
1042         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1043         adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1044
1045         adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1046
1047         if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1048                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce_2.bin", chip_name);
1049                 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1050                 if (err == -ENOENT) {
1051                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1052                         err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1053                 }
1054         } else {
1055                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1056                 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1057         }
1058         if (err)
1059                 goto out;
1060         err = amdgpu_ucode_validate(adev->gfx.ce_fw);
1061         if (err)
1062                 goto out;
1063         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1064         adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1065         adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1066
1067         /*
1068          * Support for MCBP/Virtualization in combination with chained IBs is
1069          * formal released on feature version #46
1070          */
1071         if (adev->gfx.ce_feature_version >= 46 &&
1072             adev->gfx.pfp_feature_version >= 46) {
1073                 adev->virt.chained_ib_support = true;
1074                 DRM_INFO("Chained IB support enabled!\n");
1075         } else
1076                 adev->virt.chained_ib_support = false;
1077
1078         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
1079         err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
1080         if (err)
1081                 goto out;
1082         err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
1083         rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1084         adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
1085         adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
1086
1087         adev->gfx.rlc.save_and_restore_offset =
1088                         le32_to_cpu(rlc_hdr->save_and_restore_offset);
1089         adev->gfx.rlc.clear_state_descriptor_offset =
1090                         le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
1091         adev->gfx.rlc.avail_scratch_ram_locations =
1092                         le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
1093         adev->gfx.rlc.reg_restore_list_size =
1094                         le32_to_cpu(rlc_hdr->reg_restore_list_size);
1095         adev->gfx.rlc.reg_list_format_start =
1096                         le32_to_cpu(rlc_hdr->reg_list_format_start);
1097         adev->gfx.rlc.reg_list_format_separate_start =
1098                         le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
1099         adev->gfx.rlc.starting_offsets_start =
1100                         le32_to_cpu(rlc_hdr->starting_offsets_start);
1101         adev->gfx.rlc.reg_list_format_size_bytes =
1102                         le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
1103         adev->gfx.rlc.reg_list_size_bytes =
1104                         le32_to_cpu(rlc_hdr->reg_list_size_bytes);
1105
1106         adev->gfx.rlc.register_list_format =
1107                         kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
1108                                         adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
1109
1110         if (!adev->gfx.rlc.register_list_format) {
1111                 err = -ENOMEM;
1112                 goto out;
1113         }
1114
1115         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1116                         le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1117         for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++)
1118                 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
1119
1120         adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1121
1122         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1123                         le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1124         for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++)
1125                 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
1126
1127         if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1128                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec_2.bin", chip_name);
1129                 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1130                 if (err == -ENOENT) {
1131                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1132                         err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1133                 }
1134         } else {
1135                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1136                 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1137         }
1138         if (err)
1139                 goto out;
1140         err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1141         if (err)
1142                 goto out;
1143         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1144         adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1145         adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1146
1147         if ((adev->asic_type != CHIP_STONEY) &&
1148             (adev->asic_type != CHIP_TOPAZ)) {
1149                 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1150                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2_2.bin", chip_name);
1151                         err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1152                         if (err == -ENOENT) {
1153                                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1154                                 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1155                         }
1156                 } else {
1157                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1158                         err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1159                 }
1160                 if (!err) {
1161                         err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1162                         if (err)
1163                                 goto out;
1164                         cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1165                                 adev->gfx.mec2_fw->data;
1166                         adev->gfx.mec2_fw_version =
1167                                 le32_to_cpu(cp_hdr->header.ucode_version);
1168                         adev->gfx.mec2_feature_version =
1169                                 le32_to_cpu(cp_hdr->ucode_feature_version);
1170                 } else {
1171                         err = 0;
1172                         adev->gfx.mec2_fw = NULL;
1173                 }
1174         }
1175
1176         if (adev->firmware.load_type == AMDGPU_FW_LOAD_SMU) {
1177                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1178                 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1179                 info->fw = adev->gfx.pfp_fw;
1180                 header = (const struct common_firmware_header *)info->fw->data;
1181                 adev->firmware.fw_size +=
1182                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1183
1184                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1185                 info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1186                 info->fw = adev->gfx.me_fw;
1187                 header = (const struct common_firmware_header *)info->fw->data;
1188                 adev->firmware.fw_size +=
1189                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1190
1191                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1192                 info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1193                 info->fw = adev->gfx.ce_fw;
1194                 header = (const struct common_firmware_header *)info->fw->data;
1195                 adev->firmware.fw_size +=
1196                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1197
1198                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1199                 info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1200                 info->fw = adev->gfx.rlc_fw;
1201                 header = (const struct common_firmware_header *)info->fw->data;
1202                 adev->firmware.fw_size +=
1203                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1204
1205                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1206                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1207                 info->fw = adev->gfx.mec_fw;
1208                 header = (const struct common_firmware_header *)info->fw->data;
1209                 adev->firmware.fw_size +=
1210                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1211
1212                 /* we need account JT in */
1213                 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1214                 adev->firmware.fw_size +=
1215                         ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE);
1216
1217                 if (amdgpu_sriov_vf(adev)) {
1218                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_STORAGE];
1219                         info->ucode_id = AMDGPU_UCODE_ID_STORAGE;
1220                         info->fw = adev->gfx.mec_fw;
1221                         adev->firmware.fw_size +=
1222                                 ALIGN(le32_to_cpu(64 * PAGE_SIZE), PAGE_SIZE);
1223                 }
1224
1225                 if (adev->gfx.mec2_fw) {
1226                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1227                         info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1228                         info->fw = adev->gfx.mec2_fw;
1229                         header = (const struct common_firmware_header *)info->fw->data;
1230                         adev->firmware.fw_size +=
1231                                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1232                 }
1233
1234         }
1235
1236 out:
1237         if (err) {
1238                 dev_err(adev->dev,
1239                         "gfx8: Failed to load firmware \"%s\"\n",
1240                         fw_name);
1241                 release_firmware(adev->gfx.pfp_fw);
1242                 adev->gfx.pfp_fw = NULL;
1243                 release_firmware(adev->gfx.me_fw);
1244                 adev->gfx.me_fw = NULL;
1245                 release_firmware(adev->gfx.ce_fw);
1246                 adev->gfx.ce_fw = NULL;
1247                 release_firmware(adev->gfx.rlc_fw);
1248                 adev->gfx.rlc_fw = NULL;
1249                 release_firmware(adev->gfx.mec_fw);
1250                 adev->gfx.mec_fw = NULL;
1251                 release_firmware(adev->gfx.mec2_fw);
1252                 adev->gfx.mec2_fw = NULL;
1253         }
1254         return err;
1255 }
1256
1257 static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
1258                                     volatile u32 *buffer)
1259 {
1260         u32 count = 0, i;
1261         const struct cs_section_def *sect = NULL;
1262         const struct cs_extent_def *ext = NULL;
1263
1264         if (adev->gfx.rlc.cs_data == NULL)
1265                 return;
1266         if (buffer == NULL)
1267                 return;
1268
1269         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1270         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1271
1272         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1273         buffer[count++] = cpu_to_le32(0x80000000);
1274         buffer[count++] = cpu_to_le32(0x80000000);
1275
1276         for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1277                 for (ext = sect->section; ext->extent != NULL; ++ext) {
1278                         if (sect->id == SECT_CONTEXT) {
1279                                 buffer[count++] =
1280                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1281                                 buffer[count++] = cpu_to_le32(ext->reg_index -
1282                                                 PACKET3_SET_CONTEXT_REG_START);
1283                                 for (i = 0; i < ext->reg_count; i++)
1284                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
1285                         } else {
1286                                 return;
1287                         }
1288                 }
1289         }
1290
1291         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1292         buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
1293                         PACKET3_SET_CONTEXT_REG_START);
1294         buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config);
1295         buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1);
1296
1297         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1298         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1299
1300         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1301         buffer[count++] = cpu_to_le32(0);
1302 }
1303
1304 static void cz_init_cp_jump_table(struct amdgpu_device *adev)
1305 {
1306         const __le32 *fw_data;
1307         volatile u32 *dst_ptr;
1308         int me, i, max_me = 4;
1309         u32 bo_offset = 0;
1310         u32 table_offset, table_size;
1311
1312         if (adev->asic_type == CHIP_CARRIZO)
1313                 max_me = 5;
1314
1315         /* write the cp table buffer */
1316         dst_ptr = adev->gfx.rlc.cp_table_ptr;
1317         for (me = 0; me < max_me; me++) {
1318                 if (me == 0) {
1319                         const struct gfx_firmware_header_v1_0 *hdr =
1320                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1321                         fw_data = (const __le32 *)
1322                                 (adev->gfx.ce_fw->data +
1323                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1324                         table_offset = le32_to_cpu(hdr->jt_offset);
1325                         table_size = le32_to_cpu(hdr->jt_size);
1326                 } else if (me == 1) {
1327                         const struct gfx_firmware_header_v1_0 *hdr =
1328                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1329                         fw_data = (const __le32 *)
1330                                 (adev->gfx.pfp_fw->data +
1331                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1332                         table_offset = le32_to_cpu(hdr->jt_offset);
1333                         table_size = le32_to_cpu(hdr->jt_size);
1334                 } else if (me == 2) {
1335                         const struct gfx_firmware_header_v1_0 *hdr =
1336                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1337                         fw_data = (const __le32 *)
1338                                 (adev->gfx.me_fw->data +
1339                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1340                         table_offset = le32_to_cpu(hdr->jt_offset);
1341                         table_size = le32_to_cpu(hdr->jt_size);
1342                 } else if (me == 3) {
1343                         const struct gfx_firmware_header_v1_0 *hdr =
1344                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1345                         fw_data = (const __le32 *)
1346                                 (adev->gfx.mec_fw->data +
1347                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1348                         table_offset = le32_to_cpu(hdr->jt_offset);
1349                         table_size = le32_to_cpu(hdr->jt_size);
1350                 } else  if (me == 4) {
1351                         const struct gfx_firmware_header_v1_0 *hdr =
1352                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
1353                         fw_data = (const __le32 *)
1354                                 (adev->gfx.mec2_fw->data +
1355                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1356                         table_offset = le32_to_cpu(hdr->jt_offset);
1357                         table_size = le32_to_cpu(hdr->jt_size);
1358                 }
1359
1360                 for (i = 0; i < table_size; i ++) {
1361                         dst_ptr[bo_offset + i] =
1362                                 cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
1363                 }
1364
1365                 bo_offset += table_size;
1366         }
1367 }
1368
1369 static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev)
1370 {
1371         amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, NULL, NULL);
1372         amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, NULL, NULL);
1373 }
1374
1375 static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
1376 {
1377         volatile u32 *dst_ptr;
1378         u32 dws;
1379         const struct cs_section_def *cs_data;
1380         int r;
1381
1382         adev->gfx.rlc.cs_data = vi_cs_data;
1383
1384         cs_data = adev->gfx.rlc.cs_data;
1385
1386         if (cs_data) {
1387                 /* clear state block */
1388                 adev->gfx.rlc.clear_state_size = dws = gfx_v8_0_get_csb_size(adev);
1389
1390                 r = amdgpu_bo_create_reserved(adev, dws * 4, PAGE_SIZE,
1391                                               AMDGPU_GEM_DOMAIN_VRAM,
1392                                               &adev->gfx.rlc.clear_state_obj,
1393                                               &adev->gfx.rlc.clear_state_gpu_addr,
1394                                               (void **)&adev->gfx.rlc.cs_ptr);
1395                 if (r) {
1396                         dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r);
1397                         gfx_v8_0_rlc_fini(adev);
1398                         return r;
1399                 }
1400
1401                 /* set up the cs buffer */
1402                 dst_ptr = adev->gfx.rlc.cs_ptr;
1403                 gfx_v8_0_get_csb_buffer(adev, dst_ptr);
1404                 amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
1405                 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1406         }
1407
1408         if ((adev->asic_type == CHIP_CARRIZO) ||
1409             (adev->asic_type == CHIP_STONEY)) {
1410                 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1411                 r = amdgpu_bo_create_reserved(adev, adev->gfx.rlc.cp_table_size,
1412                                               PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
1413                                               &adev->gfx.rlc.cp_table_obj,
1414                                               &adev->gfx.rlc.cp_table_gpu_addr,
1415                                               (void **)&adev->gfx.rlc.cp_table_ptr);
1416                 if (r) {
1417                         dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r);
1418                         return r;
1419                 }
1420
1421                 cz_init_cp_jump_table(adev);
1422
1423                 amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj);
1424                 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1425         }
1426
1427         return 0;
1428 }
1429
1430 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
1431 {
1432         amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1433 }
1434
1435 static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
1436 {
1437         int r;
1438         u32 *hpd;
1439         size_t mec_hpd_size;
1440
1441         bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1442
1443         /* take ownership of the relevant compute queues */
1444         amdgpu_gfx_compute_queue_acquire(adev);
1445
1446         mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE;
1447
1448         r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1449                                       AMDGPU_GEM_DOMAIN_GTT,
1450                                       &adev->gfx.mec.hpd_eop_obj,
1451                                       &adev->gfx.mec.hpd_eop_gpu_addr,
1452                                       (void **)&hpd);
1453         if (r) {
1454                 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1455                 return r;
1456         }
1457
1458         memset(hpd, 0, mec_hpd_size);
1459
1460         amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1461         amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1462
1463         return 0;
1464 }
1465
1466 static const u32 vgpr_init_compute_shader[] =
1467 {
1468         0x7e000209, 0x7e020208,
1469         0x7e040207, 0x7e060206,
1470         0x7e080205, 0x7e0a0204,
1471         0x7e0c0203, 0x7e0e0202,
1472         0x7e100201, 0x7e120200,
1473         0x7e140209, 0x7e160208,
1474         0x7e180207, 0x7e1a0206,
1475         0x7e1c0205, 0x7e1e0204,
1476         0x7e200203, 0x7e220202,
1477         0x7e240201, 0x7e260200,
1478         0x7e280209, 0x7e2a0208,
1479         0x7e2c0207, 0x7e2e0206,
1480         0x7e300205, 0x7e320204,
1481         0x7e340203, 0x7e360202,
1482         0x7e380201, 0x7e3a0200,
1483         0x7e3c0209, 0x7e3e0208,
1484         0x7e400207, 0x7e420206,
1485         0x7e440205, 0x7e460204,
1486         0x7e480203, 0x7e4a0202,
1487         0x7e4c0201, 0x7e4e0200,
1488         0x7e500209, 0x7e520208,
1489         0x7e540207, 0x7e560206,
1490         0x7e580205, 0x7e5a0204,
1491         0x7e5c0203, 0x7e5e0202,
1492         0x7e600201, 0x7e620200,
1493         0x7e640209, 0x7e660208,
1494         0x7e680207, 0x7e6a0206,
1495         0x7e6c0205, 0x7e6e0204,
1496         0x7e700203, 0x7e720202,
1497         0x7e740201, 0x7e760200,
1498         0x7e780209, 0x7e7a0208,
1499         0x7e7c0207, 0x7e7e0206,
1500         0xbf8a0000, 0xbf810000,
1501 };
1502
1503 static const u32 sgpr_init_compute_shader[] =
1504 {
1505         0xbe8a0100, 0xbe8c0102,
1506         0xbe8e0104, 0xbe900106,
1507         0xbe920108, 0xbe940100,
1508         0xbe960102, 0xbe980104,
1509         0xbe9a0106, 0xbe9c0108,
1510         0xbe9e0100, 0xbea00102,
1511         0xbea20104, 0xbea40106,
1512         0xbea60108, 0xbea80100,
1513         0xbeaa0102, 0xbeac0104,
1514         0xbeae0106, 0xbeb00108,
1515         0xbeb20100, 0xbeb40102,
1516         0xbeb60104, 0xbeb80106,
1517         0xbeba0108, 0xbebc0100,
1518         0xbebe0102, 0xbec00104,
1519         0xbec20106, 0xbec40108,
1520         0xbec60100, 0xbec80102,
1521         0xbee60004, 0xbee70005,
1522         0xbeea0006, 0xbeeb0007,
1523         0xbee80008, 0xbee90009,
1524         0xbefc0000, 0xbf8a0000,
1525         0xbf810000, 0x00000000,
1526 };
1527
1528 static const u32 vgpr_init_regs[] =
1529 {
1530         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1531         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */
1532         mmCOMPUTE_NUM_THREAD_X, 256*4,
1533         mmCOMPUTE_NUM_THREAD_Y, 1,
1534         mmCOMPUTE_NUM_THREAD_Z, 1,
1535         mmCOMPUTE_PGM_RSRC1, 0x100004f, /* VGPRS=15 (64 logical VGPRs), SGPRS=1 (16 SGPRs), BULKY=1 */
1536         mmCOMPUTE_PGM_RSRC2, 20,
1537         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1538         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1539         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1540         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1541         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1542         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1543         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1544         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1545         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1546         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1547 };
1548
1549 static const u32 sgpr1_init_regs[] =
1550 {
1551         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1552         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */
1553         mmCOMPUTE_NUM_THREAD_X, 256*5,
1554         mmCOMPUTE_NUM_THREAD_Y, 1,
1555         mmCOMPUTE_NUM_THREAD_Z, 1,
1556         mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */
1557         mmCOMPUTE_PGM_RSRC2, 20,
1558         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1559         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1560         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1561         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1562         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1563         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1564         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1565         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1566         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1567         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1568 };
1569
1570 static const u32 sgpr2_init_regs[] =
1571 {
1572         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1573         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1574         mmCOMPUTE_NUM_THREAD_X, 256*5,
1575         mmCOMPUTE_NUM_THREAD_Y, 1,
1576         mmCOMPUTE_NUM_THREAD_Z, 1,
1577         mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */
1578         mmCOMPUTE_PGM_RSRC2, 20,
1579         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1580         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1581         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1582         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1583         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1584         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1585         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1586         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1587         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1588         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1589 };
1590
1591 static const u32 sec_ded_counter_registers[] =
1592 {
1593         mmCPC_EDC_ATC_CNT,
1594         mmCPC_EDC_SCRATCH_CNT,
1595         mmCPC_EDC_UCODE_CNT,
1596         mmCPF_EDC_ATC_CNT,
1597         mmCPF_EDC_ROQ_CNT,
1598         mmCPF_EDC_TAG_CNT,
1599         mmCPG_EDC_ATC_CNT,
1600         mmCPG_EDC_DMA_CNT,
1601         mmCPG_EDC_TAG_CNT,
1602         mmDC_EDC_CSINVOC_CNT,
1603         mmDC_EDC_RESTORE_CNT,
1604         mmDC_EDC_STATE_CNT,
1605         mmGDS_EDC_CNT,
1606         mmGDS_EDC_GRBM_CNT,
1607         mmGDS_EDC_OA_DED,
1608         mmSPI_EDC_CNT,
1609         mmSQC_ATC_EDC_GATCL1_CNT,
1610         mmSQC_EDC_CNT,
1611         mmSQ_EDC_DED_CNT,
1612         mmSQ_EDC_INFO,
1613         mmSQ_EDC_SEC_CNT,
1614         mmTCC_EDC_CNT,
1615         mmTCP_ATC_EDC_GATCL1_CNT,
1616         mmTCP_EDC_CNT,
1617         mmTD_EDC_CNT
1618 };
1619
1620 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1621 {
1622         struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1623         struct amdgpu_ib ib;
1624         struct dma_fence *f = NULL;
1625         int r, i;
1626         u32 tmp;
1627         unsigned total_size, vgpr_offset, sgpr_offset;
1628         u64 gpu_addr;
1629
1630         /* only supported on CZ */
1631         if (adev->asic_type != CHIP_CARRIZO)
1632                 return 0;
1633
1634         /* bail if the compute ring is not ready */
1635         if (!ring->ready)
1636                 return 0;
1637
1638         tmp = RREG32(mmGB_EDC_MODE);
1639         WREG32(mmGB_EDC_MODE, 0);
1640
1641         total_size =
1642                 (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1643         total_size +=
1644                 (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1645         total_size +=
1646                 (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1647         total_size = ALIGN(total_size, 256);
1648         vgpr_offset = total_size;
1649         total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1650         sgpr_offset = total_size;
1651         total_size += sizeof(sgpr_init_compute_shader);
1652
1653         /* allocate an indirect buffer to put the commands in */
1654         memset(&ib, 0, sizeof(ib));
1655         r = amdgpu_ib_get(adev, NULL, total_size, &ib);
1656         if (r) {
1657                 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1658                 return r;
1659         }
1660
1661         /* load the compute shaders */
1662         for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1663                 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1664
1665         for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1666                 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1667
1668         /* init the ib length to 0 */
1669         ib.length_dw = 0;
1670
1671         /* VGPR */
1672         /* write the register state for the compute dispatch */
1673         for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1674                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1675                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1676                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1677         }
1678         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1679         gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1680         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1681         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1682         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1683         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1684
1685         /* write dispatch packet */
1686         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1687         ib.ptr[ib.length_dw++] = 8; /* x */
1688         ib.ptr[ib.length_dw++] = 1; /* y */
1689         ib.ptr[ib.length_dw++] = 1; /* z */
1690         ib.ptr[ib.length_dw++] =
1691                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1692
1693         /* write CS partial flush packet */
1694         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1695         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1696
1697         /* SGPR1 */
1698         /* write the register state for the compute dispatch */
1699         for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1700                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1701                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1702                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1703         }
1704         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1705         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1706         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1707         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1708         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1709         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1710
1711         /* write dispatch packet */
1712         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1713         ib.ptr[ib.length_dw++] = 8; /* x */
1714         ib.ptr[ib.length_dw++] = 1; /* y */
1715         ib.ptr[ib.length_dw++] = 1; /* z */
1716         ib.ptr[ib.length_dw++] =
1717                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1718
1719         /* write CS partial flush packet */
1720         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1721         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1722
1723         /* SGPR2 */
1724         /* write the register state for the compute dispatch */
1725         for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1726                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1727                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1728                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1729         }
1730         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1731         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1732         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1733         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1734         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1735         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1736
1737         /* write dispatch packet */
1738         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1739         ib.ptr[ib.length_dw++] = 8; /* x */
1740         ib.ptr[ib.length_dw++] = 1; /* y */
1741         ib.ptr[ib.length_dw++] = 1; /* z */
1742         ib.ptr[ib.length_dw++] =
1743                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1744
1745         /* write CS partial flush packet */
1746         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1747         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1748
1749         /* shedule the ib on the ring */
1750         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1751         if (r) {
1752                 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1753                 goto fail;
1754         }
1755
1756         /* wait for the GPU to finish processing the IB */
1757         r = dma_fence_wait(f, false);
1758         if (r) {
1759                 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1760                 goto fail;
1761         }
1762
1763         tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1764         tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1765         WREG32(mmGB_EDC_MODE, tmp);
1766
1767         tmp = RREG32(mmCC_GC_EDC_CONFIG);
1768         tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1769         WREG32(mmCC_GC_EDC_CONFIG, tmp);
1770
1771
1772         /* read back registers to clear the counters */
1773         for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1774                 RREG32(sec_ded_counter_registers[i]);
1775
1776 fail:
1777         amdgpu_ib_free(adev, &ib, NULL);
1778         dma_fence_put(f);
1779
1780         return r;
1781 }
1782
1783 static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1784 {
1785         u32 gb_addr_config;
1786         u32 mc_shared_chmap, mc_arb_ramcfg;
1787         u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1788         u32 tmp;
1789         int ret;
1790
1791         switch (adev->asic_type) {
1792         case CHIP_TOPAZ:
1793                 adev->gfx.config.max_shader_engines = 1;
1794                 adev->gfx.config.max_tile_pipes = 2;
1795                 adev->gfx.config.max_cu_per_sh = 6;
1796                 adev->gfx.config.max_sh_per_se = 1;
1797                 adev->gfx.config.max_backends_per_se = 2;
1798                 adev->gfx.config.max_texture_channel_caches = 2;
1799                 adev->gfx.config.max_gprs = 256;
1800                 adev->gfx.config.max_gs_threads = 32;
1801                 adev->gfx.config.max_hw_contexts = 8;
1802
1803                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1804                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1805                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1806                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1807                 gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1808                 break;
1809         case CHIP_FIJI:
1810                 adev->gfx.config.max_shader_engines = 4;
1811                 adev->gfx.config.max_tile_pipes = 16;
1812                 adev->gfx.config.max_cu_per_sh = 16;
1813                 adev->gfx.config.max_sh_per_se = 1;
1814                 adev->gfx.config.max_backends_per_se = 4;
1815                 adev->gfx.config.max_texture_channel_caches = 16;
1816                 adev->gfx.config.max_gprs = 256;
1817                 adev->gfx.config.max_gs_threads = 32;
1818                 adev->gfx.config.max_hw_contexts = 8;
1819
1820                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1821                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1822                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1823                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1824                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1825                 break;
1826         case CHIP_POLARIS11:
1827         case CHIP_POLARIS12:
1828                 ret = amdgpu_atombios_get_gfx_info(adev);
1829                 if (ret)
1830                         return ret;
1831                 adev->gfx.config.max_gprs = 256;
1832                 adev->gfx.config.max_gs_threads = 32;
1833                 adev->gfx.config.max_hw_contexts = 8;
1834
1835                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1836                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1837                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1838                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1839                 gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
1840                 break;
1841         case CHIP_POLARIS10:
1842         case CHIP_VEGAM:
1843                 ret = amdgpu_atombios_get_gfx_info(adev);
1844                 if (ret)
1845                         return ret;
1846                 adev->gfx.config.max_gprs = 256;
1847                 adev->gfx.config.max_gs_threads = 32;
1848                 adev->gfx.config.max_hw_contexts = 8;
1849
1850                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1851                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1852                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1853                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1854                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1855                 break;
1856         case CHIP_TONGA:
1857                 adev->gfx.config.max_shader_engines = 4;
1858                 adev->gfx.config.max_tile_pipes = 8;
1859                 adev->gfx.config.max_cu_per_sh = 8;
1860                 adev->gfx.config.max_sh_per_se = 1;
1861                 adev->gfx.config.max_backends_per_se = 2;
1862                 adev->gfx.config.max_texture_channel_caches = 8;
1863                 adev->gfx.config.max_gprs = 256;
1864                 adev->gfx.config.max_gs_threads = 32;
1865                 adev->gfx.config.max_hw_contexts = 8;
1866
1867                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1868                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1869                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1870                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1871                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1872                 break;
1873         case CHIP_CARRIZO:
1874                 adev->gfx.config.max_shader_engines = 1;
1875                 adev->gfx.config.max_tile_pipes = 2;
1876                 adev->gfx.config.max_sh_per_se = 1;
1877                 adev->gfx.config.max_backends_per_se = 2;
1878                 adev->gfx.config.max_cu_per_sh = 8;
1879                 adev->gfx.config.max_texture_channel_caches = 2;
1880                 adev->gfx.config.max_gprs = 256;
1881                 adev->gfx.config.max_gs_threads = 32;
1882                 adev->gfx.config.max_hw_contexts = 8;
1883
1884                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1885                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1886                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1887                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1888                 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1889                 break;
1890         case CHIP_STONEY:
1891                 adev->gfx.config.max_shader_engines = 1;
1892                 adev->gfx.config.max_tile_pipes = 2;
1893                 adev->gfx.config.max_sh_per_se = 1;
1894                 adev->gfx.config.max_backends_per_se = 1;
1895                 adev->gfx.config.max_cu_per_sh = 3;
1896                 adev->gfx.config.max_texture_channel_caches = 2;
1897                 adev->gfx.config.max_gprs = 256;
1898                 adev->gfx.config.max_gs_threads = 16;
1899                 adev->gfx.config.max_hw_contexts = 8;
1900
1901                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1902                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1903                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1904                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1905                 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1906                 break;
1907         default:
1908                 adev->gfx.config.max_shader_engines = 2;
1909                 adev->gfx.config.max_tile_pipes = 4;
1910                 adev->gfx.config.max_cu_per_sh = 2;
1911                 adev->gfx.config.max_sh_per_se = 1;
1912                 adev->gfx.config.max_backends_per_se = 2;
1913                 adev->gfx.config.max_texture_channel_caches = 4;
1914                 adev->gfx.config.max_gprs = 256;
1915                 adev->gfx.config.max_gs_threads = 32;
1916                 adev->gfx.config.max_hw_contexts = 8;
1917
1918                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1919                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1920                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1921                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1922                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1923                 break;
1924         }
1925
1926         mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
1927         adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1928         mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1929
1930         adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1931         adev->gfx.config.mem_max_burst_length_bytes = 256;
1932         if (adev->flags & AMD_IS_APU) {
1933                 /* Get memory bank mapping mode. */
1934                 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
1935                 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1936                 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1937
1938                 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
1939                 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1940                 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1941
1942                 /* Validate settings in case only one DIMM installed. */
1943                 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
1944                         dimm00_addr_map = 0;
1945                 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
1946                         dimm01_addr_map = 0;
1947                 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
1948                         dimm10_addr_map = 0;
1949                 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
1950                         dimm11_addr_map = 0;
1951
1952                 /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1953                 /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1954                 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
1955                         adev->gfx.config.mem_row_size_in_kb = 2;
1956                 else
1957                         adev->gfx.config.mem_row_size_in_kb = 1;
1958         } else {
1959                 tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
1960                 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1961                 if (adev->gfx.config.mem_row_size_in_kb > 4)
1962                         adev->gfx.config.mem_row_size_in_kb = 4;
1963         }
1964
1965         adev->gfx.config.shader_engine_tile_size = 32;
1966         adev->gfx.config.num_gpus = 1;
1967         adev->gfx.config.multi_gpu_tile_size = 64;
1968
1969         /* fix up row size */
1970         switch (adev->gfx.config.mem_row_size_in_kb) {
1971         case 1:
1972         default:
1973                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
1974                 break;
1975         case 2:
1976                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
1977                 break;
1978         case 4:
1979                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
1980                 break;
1981         }
1982         adev->gfx.config.gb_addr_config = gb_addr_config;
1983
1984         return 0;
1985 }
1986
1987 static int gfx_v8_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
1988                                         int mec, int pipe, int queue)
1989 {
1990         int r;
1991         unsigned irq_type;
1992         struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
1993
1994         ring = &adev->gfx.compute_ring[ring_id];
1995
1996         /* mec0 is me1 */
1997         ring->me = mec + 1;
1998         ring->pipe = pipe;
1999         ring->queue = queue;
2000
2001         ring->ring_obj = NULL;
2002         ring->use_doorbell = true;
2003         ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + ring_id;
2004         ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
2005                                 + (ring_id * GFX8_MEC_HPD_SIZE);
2006         sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
2007
2008         irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
2009                 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
2010                 + ring->pipe;
2011
2012         /* type-2 packets are deprecated on MEC, use type-3 instead */
2013         r = amdgpu_ring_init(adev, ring, 1024,
2014                         &adev->gfx.eop_irq, irq_type);
2015         if (r)
2016                 return r;
2017
2018
2019         return 0;
2020 }
2021
2022 static void gfx_v8_0_sq_irq_work_func(struct work_struct *work);
2023
2024 static int gfx_v8_0_sw_init(void *handle)
2025 {
2026         int i, j, k, r, ring_id;
2027         struct amdgpu_ring *ring;
2028         struct amdgpu_kiq *kiq;
2029         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2030
2031         switch (adev->asic_type) {
2032         case CHIP_TONGA:
2033         case CHIP_CARRIZO:
2034         case CHIP_FIJI:
2035         case CHIP_POLARIS10:
2036         case CHIP_POLARIS11:
2037         case CHIP_POLARIS12:
2038         case CHIP_VEGAM:
2039                 adev->gfx.mec.num_mec = 2;
2040                 break;
2041         case CHIP_TOPAZ:
2042         case CHIP_STONEY:
2043         default:
2044                 adev->gfx.mec.num_mec = 1;
2045                 break;
2046         }
2047
2048         adev->gfx.mec.num_pipe_per_mec = 4;
2049         adev->gfx.mec.num_queue_per_pipe = 8;
2050
2051         /* KIQ event */
2052         r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_INT_IB2, &adev->gfx.kiq.irq);
2053         if (r)
2054                 return r;
2055
2056         /* EOP Event */
2057         r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_END_OF_PIPE, &adev->gfx.eop_irq);
2058         if (r)
2059                 return r;
2060
2061         /* Privileged reg */
2062         r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_PRIV_REG_FAULT,
2063                               &adev->gfx.priv_reg_irq);
2064         if (r)
2065                 return r;
2066
2067         /* Privileged inst */
2068         r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_PRIV_INSTR_FAULT,
2069                               &adev->gfx.priv_inst_irq);
2070         if (r)
2071                 return r;
2072
2073         /* Add CP EDC/ECC irq  */
2074         r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_ECC_ERROR,
2075                               &adev->gfx.cp_ecc_error_irq);
2076         if (r)
2077                 return r;
2078
2079         /* SQ interrupts. */
2080         r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SQ_INTERRUPT_MSG,
2081                               &adev->gfx.sq_irq);
2082         if (r) {
2083                 DRM_ERROR("amdgpu_irq_add() for SQ failed: %d\n", r);
2084                 return r;
2085         }
2086
2087         INIT_WORK(&adev->gfx.sq_work.work, gfx_v8_0_sq_irq_work_func);
2088
2089         adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2090
2091         gfx_v8_0_scratch_init(adev);
2092
2093         r = gfx_v8_0_init_microcode(adev);
2094         if (r) {
2095                 DRM_ERROR("Failed to load gfx firmware!\n");
2096                 return r;
2097         }
2098
2099         r = gfx_v8_0_rlc_init(adev);
2100         if (r) {
2101                 DRM_ERROR("Failed to init rlc BOs!\n");
2102                 return r;
2103         }
2104
2105         r = gfx_v8_0_mec_init(adev);
2106         if (r) {
2107                 DRM_ERROR("Failed to init MEC BOs!\n");
2108                 return r;
2109         }
2110
2111         /* set up the gfx ring */
2112         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2113                 ring = &adev->gfx.gfx_ring[i];
2114                 ring->ring_obj = NULL;
2115                 sprintf(ring->name, "gfx");
2116                 /* no gfx doorbells on iceland */
2117                 if (adev->asic_type != CHIP_TOPAZ) {
2118                         ring->use_doorbell = true;
2119                         ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0;
2120                 }
2121
2122                 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2123                                      AMDGPU_CP_IRQ_GFX_EOP);
2124                 if (r)
2125                         return r;
2126         }
2127
2128
2129         /* set up the compute queues - allocate horizontally across pipes */
2130         ring_id = 0;
2131         for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
2132                 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
2133                         for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
2134                                 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
2135                                         continue;
2136
2137                                 r = gfx_v8_0_compute_ring_init(adev,
2138                                                                 ring_id,
2139                                                                 i, k, j);
2140                                 if (r)
2141                                         return r;
2142
2143                                 ring_id++;
2144                         }
2145                 }
2146         }
2147
2148         r = amdgpu_gfx_kiq_init(adev, GFX8_MEC_HPD_SIZE);
2149         if (r) {
2150                 DRM_ERROR("Failed to init KIQ BOs!\n");
2151                 return r;
2152         }
2153
2154         kiq = &adev->gfx.kiq;
2155         r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
2156         if (r)
2157                 return r;
2158
2159         /* create MQD for all compute queues as well as KIQ for SRIOV case */
2160         r = amdgpu_gfx_compute_mqd_sw_init(adev, sizeof(struct vi_mqd_allocation));
2161         if (r)
2162                 return r;
2163
2164         /* reserve GDS, GWS and OA resource for gfx */
2165         r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size,
2166                                     PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS,
2167                                     &adev->gds.gds_gfx_bo, NULL, NULL);
2168         if (r)
2169                 return r;
2170
2171         r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size,
2172                                     PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS,
2173                                     &adev->gds.gws_gfx_bo, NULL, NULL);
2174         if (r)
2175                 return r;
2176
2177         r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size,
2178                                     PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA,
2179                                     &adev->gds.oa_gfx_bo, NULL, NULL);
2180         if (r)
2181                 return r;
2182
2183         adev->gfx.ce_ram_size = 0x8000;
2184
2185         r = gfx_v8_0_gpu_early_init(adev);
2186         if (r)
2187                 return r;
2188
2189         return 0;
2190 }
2191
2192 static int gfx_v8_0_sw_fini(void *handle)
2193 {
2194         int i;
2195         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2196
2197         amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
2198         amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL);
2199         amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
2200
2201         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2202                 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2203         for (i = 0; i < adev->gfx.num_compute_rings; i++)
2204                 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2205
2206         amdgpu_gfx_compute_mqd_sw_fini(adev);
2207         amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
2208         amdgpu_gfx_kiq_fini(adev);
2209
2210         gfx_v8_0_mec_fini(adev);
2211         gfx_v8_0_rlc_fini(adev);
2212         amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
2213                                 &adev->gfx.rlc.clear_state_gpu_addr,
2214                                 (void **)&adev->gfx.rlc.cs_ptr);
2215         if ((adev->asic_type == CHIP_CARRIZO) ||
2216             (adev->asic_type == CHIP_STONEY)) {
2217                 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
2218                                 &adev->gfx.rlc.cp_table_gpu_addr,
2219                                 (void **)&adev->gfx.rlc.cp_table_ptr);
2220         }
2221         gfx_v8_0_free_microcode(adev);
2222
2223         return 0;
2224 }
2225
2226 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
2227 {
2228         uint32_t *modearray, *mod2array;
2229         const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
2230         const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
2231         u32 reg_offset;
2232
2233         modearray = adev->gfx.config.tile_mode_array;
2234         mod2array = adev->gfx.config.macrotile_mode_array;
2235
2236         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2237                 modearray[reg_offset] = 0;
2238
2239         for (reg_offset = 0; reg_offset <  num_secondary_tile_mode_states; reg_offset++)
2240                 mod2array[reg_offset] = 0;
2241
2242         switch (adev->asic_type) {
2243         case CHIP_TOPAZ:
2244                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2245                                 PIPE_CONFIG(ADDR_SURF_P2) |
2246                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2247                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2248                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2249                                 PIPE_CONFIG(ADDR_SURF_P2) |
2250                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2251                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2252                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2253                                 PIPE_CONFIG(ADDR_SURF_P2) |
2254                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2255                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2256                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2257                                 PIPE_CONFIG(ADDR_SURF_P2) |
2258                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2259                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2260                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2261                                 PIPE_CONFIG(ADDR_SURF_P2) |
2262                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2263                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2264                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2265                                 PIPE_CONFIG(ADDR_SURF_P2) |
2266                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2267                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2268                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2269                                 PIPE_CONFIG(ADDR_SURF_P2) |
2270                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2271                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2272                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2273                                 PIPE_CONFIG(ADDR_SURF_P2));
2274                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2275                                 PIPE_CONFIG(ADDR_SURF_P2) |
2276                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2277                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2278                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2279                                  PIPE_CONFIG(ADDR_SURF_P2) |
2280                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2281                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2282                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2283                                  PIPE_CONFIG(ADDR_SURF_P2) |
2284                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2285                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2286                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2287                                  PIPE_CONFIG(ADDR_SURF_P2) |
2288                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2289                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2290                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2291                                  PIPE_CONFIG(ADDR_SURF_P2) |
2292                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2293                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2294                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2295                                  PIPE_CONFIG(ADDR_SURF_P2) |
2296                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2297                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2298                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2299                                  PIPE_CONFIG(ADDR_SURF_P2) |
2300                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2301                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2302                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2303                                  PIPE_CONFIG(ADDR_SURF_P2) |
2304                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2305                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2306                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2307                                  PIPE_CONFIG(ADDR_SURF_P2) |
2308                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2309                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2310                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2311                                  PIPE_CONFIG(ADDR_SURF_P2) |
2312                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2313                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2314                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2315                                  PIPE_CONFIG(ADDR_SURF_P2) |
2316                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2317                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2318                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2319                                  PIPE_CONFIG(ADDR_SURF_P2) |
2320                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2321                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2322                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2323                                  PIPE_CONFIG(ADDR_SURF_P2) |
2324                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2325                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2326                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2327                                  PIPE_CONFIG(ADDR_SURF_P2) |
2328                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2329                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2330                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2331                                  PIPE_CONFIG(ADDR_SURF_P2) |
2332                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2333                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2334                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2335                                  PIPE_CONFIG(ADDR_SURF_P2) |
2336                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2337                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2338                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2339                                  PIPE_CONFIG(ADDR_SURF_P2) |
2340                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2341                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2342                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2343                                  PIPE_CONFIG(ADDR_SURF_P2) |
2344                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2345                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2346
2347                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2348                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2349                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2350                                 NUM_BANKS(ADDR_SURF_8_BANK));
2351                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2352                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2353                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2354                                 NUM_BANKS(ADDR_SURF_8_BANK));
2355                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2356                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2357                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2358                                 NUM_BANKS(ADDR_SURF_8_BANK));
2359                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2360                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2361                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2362                                 NUM_BANKS(ADDR_SURF_8_BANK));
2363                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2364                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2365                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2366                                 NUM_BANKS(ADDR_SURF_8_BANK));
2367                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2368                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2369                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2370                                 NUM_BANKS(ADDR_SURF_8_BANK));
2371                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2372                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2373                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2374                                 NUM_BANKS(ADDR_SURF_8_BANK));
2375                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2376                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2377                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2378                                 NUM_BANKS(ADDR_SURF_16_BANK));
2379                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2380                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2381                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2382                                 NUM_BANKS(ADDR_SURF_16_BANK));
2383                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2384                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2385                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2386                                  NUM_BANKS(ADDR_SURF_16_BANK));
2387                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2388                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2389                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2390                                  NUM_BANKS(ADDR_SURF_16_BANK));
2391                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2392                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2393                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2394                                  NUM_BANKS(ADDR_SURF_16_BANK));
2395                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2396                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2397                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2398                                  NUM_BANKS(ADDR_SURF_16_BANK));
2399                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2400                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2401                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2402                                  NUM_BANKS(ADDR_SURF_8_BANK));
2403
2404                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2405                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2406                             reg_offset != 23)
2407                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2408
2409                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2410                         if (reg_offset != 7)
2411                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2412
2413                 break;
2414         case CHIP_FIJI:
2415         case CHIP_VEGAM:
2416                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2417                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2418                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2419                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2420                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2421                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2422                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2423                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2424                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2425                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2426                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2427                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2428                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2429                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2430                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2431                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2432                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2433                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2434                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2435                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2436                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2437                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2438                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2439                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2440                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2441                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2442                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2443                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2444                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2445                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2446                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2447                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2448                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2449                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2450                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2451                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2452                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2453                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2454                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2455                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2456                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2457                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2458                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2459                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2460                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2461                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2462                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2463                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2464                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2465                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2466                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2467                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2468                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2469                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2470                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2471                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2472                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2473                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2474                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2475                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2476                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2477                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2478                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2479                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2480                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2481                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2482                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2483                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2484                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2485                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2486                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2487                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2488                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2489                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2490                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2491                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2492                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2493                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2494                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2495                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2496                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2497                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2498                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2499                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2500                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2501                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2502                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2503                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2504                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2505                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2506                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2507                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2508                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2509                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2510                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2511                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2512                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2513                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2514                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2515                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2516                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2517                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2518                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2519                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2520                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2521                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2522                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2523                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2524                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2525                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2526                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2527                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2528                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2529                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2530                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2531                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2532                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2533                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2534                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2535                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2536                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2537                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2538
2539                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2540                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2541                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2542                                 NUM_BANKS(ADDR_SURF_8_BANK));
2543                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2544                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2545                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2546                                 NUM_BANKS(ADDR_SURF_8_BANK));
2547                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2548                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2549                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2550                                 NUM_BANKS(ADDR_SURF_8_BANK));
2551                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2552                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2553                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2554                                 NUM_BANKS(ADDR_SURF_8_BANK));
2555                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2556                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2557                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2558                                 NUM_BANKS(ADDR_SURF_8_BANK));
2559                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2560                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2561                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2562                                 NUM_BANKS(ADDR_SURF_8_BANK));
2563                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2564                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2565                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2566                                 NUM_BANKS(ADDR_SURF_8_BANK));
2567                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2568                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2569                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2570                                 NUM_BANKS(ADDR_SURF_8_BANK));
2571                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2572                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2573                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2574                                 NUM_BANKS(ADDR_SURF_8_BANK));
2575                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2576                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2577                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2578                                  NUM_BANKS(ADDR_SURF_8_BANK));
2579                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2580                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2581                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2582                                  NUM_BANKS(ADDR_SURF_8_BANK));
2583                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2584                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2585                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2586                                  NUM_BANKS(ADDR_SURF_8_BANK));
2587                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2588                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2589                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2590                                  NUM_BANKS(ADDR_SURF_8_BANK));
2591                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2592                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2593                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2594                                  NUM_BANKS(ADDR_SURF_4_BANK));
2595
2596                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2597                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2598
2599                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2600                         if (reg_offset != 7)
2601                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2602
2603                 break;
2604         case CHIP_TONGA:
2605                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2606                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2607                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2608                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2609                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2610                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2611                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2612                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2613                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2614                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2615                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2616                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2617                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2618                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2619                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2620                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2621                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2622                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2623                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2624                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2625                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2626                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2627                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2628                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2629                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2630                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2631                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2632                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2633                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2634                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2635                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2636                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2637                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2638                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2639                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2640                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2641                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2642                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2643                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2644                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2645                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2646                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2647                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2648                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2649                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2650                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2651                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2652                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2653                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2654                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2655                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2656                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2657                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2658                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2659                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2660                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2661                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2662                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2663                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2664                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2665                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2666                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2667                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2668                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2669                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2670                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2671                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2672                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2673                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2674                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2675                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2676                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2677                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2678                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2679                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2680                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2681                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2682                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2683                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2684                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2685                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2686                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2687                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2688                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2689                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2690                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2691                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2692                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2693                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2694                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2695                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2696                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2697                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2698                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2699                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2700                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2701                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2702                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2703                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2704                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2705                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2706                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2707                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2708                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2709                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2710                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2711                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2712                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2713                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2714                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2715                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2716                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2717                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2718                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2719                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2720                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2721                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2722                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2723                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2724                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2725                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2726                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2727
2728                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2729                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2730                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2731                                 NUM_BANKS(ADDR_SURF_16_BANK));
2732                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2733                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2734                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2735                                 NUM_BANKS(ADDR_SURF_16_BANK));
2736                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2737                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2738                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2739                                 NUM_BANKS(ADDR_SURF_16_BANK));
2740                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2741                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2742                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2743                                 NUM_BANKS(ADDR_SURF_16_BANK));
2744                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2745                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2746                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2747                                 NUM_BANKS(ADDR_SURF_16_BANK));
2748                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2749                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2750                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2751                                 NUM_BANKS(ADDR_SURF_16_BANK));
2752                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2753                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2754                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2755                                 NUM_BANKS(ADDR_SURF_16_BANK));
2756                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2757                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2758                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2759                                 NUM_BANKS(ADDR_SURF_16_BANK));
2760                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2761                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2762                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2763                                 NUM_BANKS(ADDR_SURF_16_BANK));
2764                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2765                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2766                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2767                                  NUM_BANKS(ADDR_SURF_16_BANK));
2768                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2769                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2770                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2771                                  NUM_BANKS(ADDR_SURF_16_BANK));
2772                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2773                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2774                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2775                                  NUM_BANKS(ADDR_SURF_8_BANK));
2776                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2777                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2778                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2779                                  NUM_BANKS(ADDR_SURF_4_BANK));
2780                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2781                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2782                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2783                                  NUM_BANKS(ADDR_SURF_4_BANK));
2784
2785                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2786                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2787
2788                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2789                         if (reg_offset != 7)
2790                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2791
2792                 break;
2793         case CHIP_POLARIS11:
2794         case CHIP_POLARIS12:
2795                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2796                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2797                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2798                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2799                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2800                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2801                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2802                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2803                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2804                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2805                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2806                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2807                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2808                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2809                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2810                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2811                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2812                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2813                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2814                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2815                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2816                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2817                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2818                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2819                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2820                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2821                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2822                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2823                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2824                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2825                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2826                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2827                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2828                                 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2829                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2830                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2831                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2832                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2833                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2834                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2835                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2836                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2837                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2838                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2839                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2840                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2841                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2842                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2843                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2844                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2845                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2846                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2847                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2848                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2849                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2850                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2851                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2852                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2853                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2854                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2855                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2856                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2857                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2858                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2859                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2860                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2861                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2862                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2863                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2864                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2865                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2866                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2867                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2868                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2869                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2870                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2871                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2872                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2873                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2874                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2875                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2876                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2877                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2878                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2879                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2880                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2881                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2882                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2883                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2884                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2885                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2886                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2887                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2888                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2889                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2890                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2891                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2892                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2893                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2894                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2895                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2896                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2897                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2898                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2899                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2900                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2901                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2902                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2903                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2904                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2905                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2906                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2907                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2908                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2909                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2910                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2911                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2912                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2913                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2914                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2915                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2916                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2917
2918                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2919                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2920                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2921                                 NUM_BANKS(ADDR_SURF_16_BANK));
2922
2923                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2924                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2925                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2926                                 NUM_BANKS(ADDR_SURF_16_BANK));
2927
2928                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2929                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2930                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2931                                 NUM_BANKS(ADDR_SURF_16_BANK));
2932
2933                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2934                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2935                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2936                                 NUM_BANKS(ADDR_SURF_16_BANK));
2937
2938                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2939                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2940                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2941                                 NUM_BANKS(ADDR_SURF_16_BANK));
2942
2943                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2944                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2945                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2946                                 NUM_BANKS(ADDR_SURF_16_BANK));
2947
2948                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2949                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2950                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2951                                 NUM_BANKS(ADDR_SURF_16_BANK));
2952
2953                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2954                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2955                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2956                                 NUM_BANKS(ADDR_SURF_16_BANK));
2957
2958                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2959                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2960                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2961                                 NUM_BANKS(ADDR_SURF_16_BANK));
2962
2963                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2964                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2965                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2966                                 NUM_BANKS(ADDR_SURF_16_BANK));
2967
2968                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2969                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2970                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2971                                 NUM_BANKS(ADDR_SURF_16_BANK));
2972
2973                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2974                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2975                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2976                                 NUM_BANKS(ADDR_SURF_16_BANK));
2977
2978                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2979                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2980                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2981                                 NUM_BANKS(ADDR_SURF_8_BANK));
2982
2983                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2984                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2985                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2986                                 NUM_BANKS(ADDR_SURF_4_BANK));
2987
2988                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2989                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2990
2991                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2992                         if (reg_offset != 7)
2993                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2994
2995                 break;
2996         case CHIP_POLARIS10:
2997                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2998                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2999                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3000                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3001                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3002                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3003                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3004                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3005                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3006                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3007                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3008                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3009                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3010                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3011                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3012                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3013                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3014                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3015                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3016                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3017                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3018                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3019                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3020                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3021                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3022                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3023                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3024                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3025                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3026                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3027                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3028                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3029                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3030                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
3031                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3032                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3033                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3034                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3035                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3036                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3037                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3038                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3039                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3040                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3041                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3042                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3043                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3044                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3045                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3046                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3047                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3048                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3049                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3050                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3051                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3052                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3053                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3054                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3055                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3056                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3057                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3058                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3059                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3060                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3061                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3062                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3063                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3064                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3065                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3066                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3067                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3068                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3069                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3070                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3071                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3072                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3073                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3074                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3075                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3076                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3077                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3078                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3079                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3080                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3081                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3082                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3083                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3084                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3085                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3086                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3087                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3088                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3089                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3090                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3091                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3092                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3093                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3094                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3095                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3096                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3097                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3098                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3099                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3100                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3101                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3102                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3103                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3104                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3105                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3106                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3107                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3108                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3109                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3110                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3111                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3112                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3113                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3114                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3115                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3116                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3117                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3118                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3119
3120                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3121                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3122                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3123                                 NUM_BANKS(ADDR_SURF_16_BANK));
3124
3125                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3126                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3127                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3128                                 NUM_BANKS(ADDR_SURF_16_BANK));
3129
3130                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3131                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3132                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3133                                 NUM_BANKS(ADDR_SURF_16_BANK));
3134
3135                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3136                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3137                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3138                                 NUM_BANKS(ADDR_SURF_16_BANK));
3139
3140                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3141                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3142                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3143                                 NUM_BANKS(ADDR_SURF_16_BANK));
3144
3145                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3146                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3147                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3148                                 NUM_BANKS(ADDR_SURF_16_BANK));
3149
3150                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3151                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3152                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3153                                 NUM_BANKS(ADDR_SURF_16_BANK));
3154
3155                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3156                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3157                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3158                                 NUM_BANKS(ADDR_SURF_16_BANK));
3159
3160                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3161                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3162                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3163                                 NUM_BANKS(ADDR_SURF_16_BANK));
3164
3165                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3166                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3167                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3168                                 NUM_BANKS(ADDR_SURF_16_BANK));
3169
3170                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3171                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3172                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3173                                 NUM_BANKS(ADDR_SURF_16_BANK));
3174
3175                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3176                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3177                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3178                                 NUM_BANKS(ADDR_SURF_8_BANK));
3179
3180                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3181                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3182                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3183                                 NUM_BANKS(ADDR_SURF_4_BANK));
3184
3185                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3186                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3187                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3188                                 NUM_BANKS(ADDR_SURF_4_BANK));
3189
3190                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3191                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3192
3193                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3194                         if (reg_offset != 7)
3195                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3196
3197                 break;
3198         case CHIP_STONEY:
3199                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3200                                 PIPE_CONFIG(ADDR_SURF_P2) |
3201                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3202                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3203                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3204                                 PIPE_CONFIG(ADDR_SURF_P2) |
3205                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3206                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3207                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3208                                 PIPE_CONFIG(ADDR_SURF_P2) |
3209                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3210                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3211                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3212                                 PIPE_CONFIG(ADDR_SURF_P2) |
3213                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3214                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3215                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3216                                 PIPE_CONFIG(ADDR_SURF_P2) |
3217                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3218                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3219                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3220                                 PIPE_CONFIG(ADDR_SURF_P2) |
3221                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3222                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3223                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3224                                 PIPE_CONFIG(ADDR_SURF_P2) |
3225                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3226                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3227                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3228                                 PIPE_CONFIG(ADDR_SURF_P2));
3229                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3230                                 PIPE_CONFIG(ADDR_SURF_P2) |
3231                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3232                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3233                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3234                                  PIPE_CONFIG(ADDR_SURF_P2) |
3235                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3236                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3237                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3238                                  PIPE_CONFIG(ADDR_SURF_P2) |
3239                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3240                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3241                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3242                                  PIPE_CONFIG(ADDR_SURF_P2) |
3243                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3244                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3245                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3246                                  PIPE_CONFIG(ADDR_SURF_P2) |
3247                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3248                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3249                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3250                                  PIPE_CONFIG(ADDR_SURF_P2) |
3251                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3252                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3253                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3254                                  PIPE_CONFIG(ADDR_SURF_P2) |
3255                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3256                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3257                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3258                                  PIPE_CONFIG(ADDR_SURF_P2) |
3259                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3260                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3261                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3262                                  PIPE_CONFIG(ADDR_SURF_P2) |
3263                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3264                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3265                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3266                                  PIPE_CONFIG(ADDR_SURF_P2) |
3267                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3268                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3269                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3270                                  PIPE_CONFIG(ADDR_SURF_P2) |
3271                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3272                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3273                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3274                                  PIPE_CONFIG(ADDR_SURF_P2) |
3275                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3276                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3277                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3278                                  PIPE_CONFIG(ADDR_SURF_P2) |
3279                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3280                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3281                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3282                                  PIPE_CONFIG(ADDR_SURF_P2) |
3283                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3284                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3285                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3286                                  PIPE_CONFIG(ADDR_SURF_P2) |
3287                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3288                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3289                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3290                                  PIPE_CONFIG(ADDR_SURF_P2) |
3291                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3292                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3293                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3294                                  PIPE_CONFIG(ADDR_SURF_P2) |
3295                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3296                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3297                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3298                                  PIPE_CONFIG(ADDR_SURF_P2) |
3299                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3300                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3301
3302                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3303                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3304                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3305                                 NUM_BANKS(ADDR_SURF_8_BANK));
3306                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3307                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3308                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3309                                 NUM_BANKS(ADDR_SURF_8_BANK));
3310                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3311                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3312                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3313                                 NUM_BANKS(ADDR_SURF_8_BANK));
3314                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3315                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3316                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3317                                 NUM_BANKS(ADDR_SURF_8_BANK));
3318                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3319                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3320                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3321                                 NUM_BANKS(ADDR_SURF_8_BANK));
3322                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3323                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3324                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3325                                 NUM_BANKS(ADDR_SURF_8_BANK));
3326                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3327                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3328                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3329                                 NUM_BANKS(ADDR_SURF_8_BANK));
3330                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3331                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3332                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3333                                 NUM_BANKS(ADDR_SURF_16_BANK));
3334                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3335                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3336                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3337                                 NUM_BANKS(ADDR_SURF_16_BANK));
3338                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3339                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3340                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3341                                  NUM_BANKS(ADDR_SURF_16_BANK));
3342                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3343                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3344                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3345                                  NUM_BANKS(ADDR_SURF_16_BANK));
3346                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3347                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3348                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3349                                  NUM_BANKS(ADDR_SURF_16_BANK));
3350                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3351                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3352                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3353                                  NUM_BANKS(ADDR_SURF_16_BANK));
3354                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3355                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3356                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3357                                  NUM_BANKS(ADDR_SURF_8_BANK));
3358
3359                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3360                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3361                             reg_offset != 23)
3362                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3363
3364                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3365                         if (reg_offset != 7)
3366                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3367
3368                 break;
3369         default:
3370                 dev_warn(adev->dev,
3371                          "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
3372                          adev->asic_type);
3373
3374         case CHIP_CARRIZO:
3375                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3376                                 PIPE_CONFIG(ADDR_SURF_P2) |
3377                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3378                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3379                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3380                                 PIPE_CONFIG(ADDR_SURF_P2) |
3381                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3382                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3383                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3384                                 PIPE_CONFIG(ADDR_SURF_P2) |
3385                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3386                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3387                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3388                                 PIPE_CONFIG(ADDR_SURF_P2) |
3389                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3390                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3391                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3392                                 PIPE_CONFIG(ADDR_SURF_P2) |
3393                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3394                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3395                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3396                                 PIPE_CONFIG(ADDR_SURF_P2) |
3397                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3398                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3399                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3400                                 PIPE_CONFIG(ADDR_SURF_P2) |
3401                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3402                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3403                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3404                                 PIPE_CONFIG(ADDR_SURF_P2));
3405                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3406                                 PIPE_CONFIG(ADDR_SURF_P2) |
3407                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3408                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3409                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3410                                  PIPE_CONFIG(ADDR_SURF_P2) |
3411                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3412                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3413                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3414                                  PIPE_CONFIG(ADDR_SURF_P2) |
3415                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3416                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3417                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3418                                  PIPE_CONFIG(ADDR_SURF_P2) |
3419                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3420                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3421                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3422                                  PIPE_CONFIG(ADDR_SURF_P2) |
3423                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3424                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3425                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3426                                  PIPE_CONFIG(ADDR_SURF_P2) |
3427                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3428                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3429                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3430                                  PIPE_CONFIG(ADDR_SURF_P2) |
3431                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3432                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3433                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3434                                  PIPE_CONFIG(ADDR_SURF_P2) |
3435                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3436                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3437                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3438                                  PIPE_CONFIG(ADDR_SURF_P2) |
3439                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3440                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3441                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3442                                  PIPE_CONFIG(ADDR_SURF_P2) |
3443                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3444                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3445                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3446                                  PIPE_CONFIG(ADDR_SURF_P2) |
3447                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3448                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3449                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3450                                  PIPE_CONFIG(ADDR_SURF_P2) |
3451                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3452                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3453                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3454                                  PIPE_CONFIG(ADDR_SURF_P2) |
3455                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3456                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3457                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3458                                  PIPE_CONFIG(ADDR_SURF_P2) |
3459                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3460                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3461                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3462                                  PIPE_CONFIG(ADDR_SURF_P2) |
3463                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3464                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3465                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3466                                  PIPE_CONFIG(ADDR_SURF_P2) |
3467                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3468                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3469                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3470                                  PIPE_CONFIG(ADDR_SURF_P2) |
3471                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3472                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3473                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3474                                  PIPE_CONFIG(ADDR_SURF_P2) |
3475                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3476                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3477
3478                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3479                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3480                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3481                                 NUM_BANKS(ADDR_SURF_8_BANK));
3482                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3483                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3484                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3485                                 NUM_BANKS(ADDR_SURF_8_BANK));
3486                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3487                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3488                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3489                                 NUM_BANKS(ADDR_SURF_8_BANK));
3490                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3491                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3492                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3493                                 NUM_BANKS(ADDR_SURF_8_BANK));
3494                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3495                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3496                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3497                                 NUM_BANKS(ADDR_SURF_8_BANK));
3498                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3499                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3500                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3501                                 NUM_BANKS(ADDR_SURF_8_BANK));
3502                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3503                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3504                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3505                                 NUM_BANKS(ADDR_SURF_8_BANK));
3506                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3507                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3508                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3509                                 NUM_BANKS(ADDR_SURF_16_BANK));
3510                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3511                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3512                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3513                                 NUM_BANKS(ADDR_SURF_16_BANK));
3514                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3515                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3516                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3517                                  NUM_BANKS(ADDR_SURF_16_BANK));
3518                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3519                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3520                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3521                                  NUM_BANKS(ADDR_SURF_16_BANK));
3522                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3523                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3524                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3525                                  NUM_BANKS(ADDR_SURF_16_BANK));
3526                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3527                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3528                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3529                                  NUM_BANKS(ADDR_SURF_16_BANK));
3530                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3531                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3532                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3533                                  NUM_BANKS(ADDR_SURF_8_BANK));
3534
3535                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3536                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3537                             reg_offset != 23)
3538                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3539
3540                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3541                         if (reg_offset != 7)
3542                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3543
3544                 break;
3545         }
3546 }
3547
3548 static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
3549                                   u32 se_num, u32 sh_num, u32 instance)
3550 {
3551         u32 data;
3552
3553         if (instance == 0xffffffff)
3554                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
3555         else
3556                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
3557
3558         if (se_num == 0xffffffff)
3559                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3560         else
3561                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3562
3563         if (sh_num == 0xffffffff)
3564                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3565         else
3566                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3567
3568         WREG32(mmGRBM_GFX_INDEX, data);
3569 }
3570
3571 static void gfx_v8_0_select_me_pipe_q(struct amdgpu_device *adev,
3572                                   u32 me, u32 pipe, u32 q)
3573 {
3574         vi_srbm_select(adev, me, pipe, q, 0);
3575 }
3576
3577 static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
3578 {
3579         u32 data, mask;
3580
3581         data =  RREG32(mmCC_RB_BACKEND_DISABLE) |
3582                 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3583
3584         data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE);
3585
3586         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
3587                                          adev->gfx.config.max_sh_per_se);
3588
3589         return (~data) & mask;
3590 }
3591
3592 static void
3593 gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1)
3594 {
3595         switch (adev->asic_type) {
3596         case CHIP_FIJI:
3597         case CHIP_VEGAM:
3598                 *rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) |
3599                           RB_XSEL2(1) | PKR_MAP(2) |
3600                           PKR_XSEL(1) | PKR_YSEL(1) |
3601                           SE_MAP(2) | SE_XSEL(2) | SE_YSEL(3);
3602                 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) |
3603                            SE_PAIR_YSEL(2);
3604                 break;
3605         case CHIP_TONGA:
3606         case CHIP_POLARIS10:
3607                 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3608                           SE_XSEL(1) | SE_YSEL(1);
3609                 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(2) |
3610                            SE_PAIR_YSEL(2);
3611                 break;
3612         case CHIP_TOPAZ:
3613         case CHIP_CARRIZO:
3614                 *rconf |= RB_MAP_PKR0(2);
3615                 *rconf1 |= 0x0;
3616                 break;
3617         case CHIP_POLARIS11:
3618         case CHIP_POLARIS12:
3619                 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3620                           SE_XSEL(1) | SE_YSEL(1);
3621                 *rconf1 |= 0x0;
3622                 break;
3623         case CHIP_STONEY:
3624                 *rconf |= 0x0;
3625                 *rconf1 |= 0x0;
3626                 break;
3627         default:
3628                 DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type);
3629                 break;
3630         }
3631 }
3632
3633 static void
3634 gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev,
3635                                         u32 raster_config, u32 raster_config_1,
3636                                         unsigned rb_mask, unsigned num_rb)
3637 {
3638         unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1);
3639         unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1);
3640         unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2);
3641         unsigned rb_per_se = num_rb / num_se;
3642         unsigned se_mask[4];
3643         unsigned se;
3644
3645         se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask;
3646         se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask;
3647         se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask;
3648         se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask;
3649
3650         WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4));
3651         WARN_ON(!(sh_per_se == 1 || sh_per_se == 2));
3652         WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2));
3653
3654         if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
3655                              (!se_mask[2] && !se_mask[3]))) {
3656                 raster_config_1 &= ~SE_PAIR_MAP_MASK;
3657
3658                 if (!se_mask[0] && !se_mask[1]) {
3659                         raster_config_1 |=
3660                                 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3);
3661                 } else {
3662                         raster_config_1 |=
3663                                 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0);
3664                 }
3665         }
3666
3667         for (se = 0; se < num_se; se++) {
3668                 unsigned raster_config_se = raster_config;
3669                 unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
3670                 unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
3671                 int idx = (se / 2) * 2;
3672
3673                 if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
3674                         raster_config_se &= ~SE_MAP_MASK;
3675
3676                         if (!se_mask[idx]) {
3677                                 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3);
3678                         } else {
3679                                 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0);
3680                         }
3681                 }
3682
3683                 pkr0_mask &= rb_mask;
3684                 pkr1_mask &= rb_mask;
3685                 if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
3686                         raster_config_se &= ~PKR_MAP_MASK;
3687
3688                         if (!pkr0_mask) {
3689                                 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3);
3690                         } else {
3691                                 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0);
3692                         }
3693                 }
3694
3695                 if (rb_per_se >= 2) {
3696                         unsigned rb0_mask = 1 << (se * rb_per_se);
3697                         unsigned rb1_mask = rb0_mask << 1;
3698
3699                         rb0_mask &= rb_mask;
3700                         rb1_mask &= rb_mask;
3701                         if (!rb0_mask || !rb1_mask) {
3702                                 raster_config_se &= ~RB_MAP_PKR0_MASK;
3703
3704                                 if (!rb0_mask) {
3705                                         raster_config_se |=
3706                                                 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3);
3707                                 } else {
3708                                         raster_config_se |=
3709                                                 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0);
3710                                 }
3711                         }
3712
3713                         if (rb_per_se > 2) {
3714                                 rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
3715                                 rb1_mask = rb0_mask << 1;
3716                                 rb0_mask &= rb_mask;
3717                                 rb1_mask &= rb_mask;
3718                                 if (!rb0_mask || !rb1_mask) {
3719                                         raster_config_se &= ~RB_MAP_PKR1_MASK;
3720
3721                                         if (!rb0_mask) {
3722                                                 raster_config_se |=
3723                                                         RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3);
3724                                         } else {
3725                                                 raster_config_se |=
3726                                                         RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0);
3727                                         }
3728                                 }
3729                         }
3730                 }
3731
3732                 /* GRBM_GFX_INDEX has a different offset on VI */
3733                 gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff);
3734                 WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se);
3735                 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3736         }
3737
3738         /* GRBM_GFX_INDEX has a different offset on VI */
3739         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3740 }
3741
3742 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
3743 {
3744         int i, j;
3745         u32 data;
3746         u32 raster_config = 0, raster_config_1 = 0;
3747         u32 active_rbs = 0;
3748         u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
3749                                         adev->gfx.config.max_sh_per_se;
3750         unsigned num_rb_pipes;
3751
3752         mutex_lock(&adev->grbm_idx_mutex);
3753         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3754                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3755                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3756                         data = gfx_v8_0_get_rb_active_bitmap(adev);
3757                         active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
3758                                                rb_bitmap_width_per_sh);
3759                 }
3760         }
3761         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3762
3763         adev->gfx.config.backend_enable_mask = active_rbs;
3764         adev->gfx.config.num_rbs = hweight32(active_rbs);
3765
3766         num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se *
3767                              adev->gfx.config.max_shader_engines, 16);
3768
3769         gfx_v8_0_raster_config(adev, &raster_config, &raster_config_1);
3770
3771         if (!adev->gfx.config.backend_enable_mask ||
3772                         adev->gfx.config.num_rbs >= num_rb_pipes) {
3773                 WREG32(mmPA_SC_RASTER_CONFIG, raster_config);
3774                 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3775         } else {
3776                 gfx_v8_0_write_harvested_raster_configs(adev, raster_config, raster_config_1,
3777                                                         adev->gfx.config.backend_enable_mask,
3778                                                         num_rb_pipes);
3779         }
3780
3781         /* cache the values for userspace */
3782         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3783                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3784                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3785                         adev->gfx.config.rb_config[i][j].rb_backend_disable =
3786                                 RREG32(mmCC_RB_BACKEND_DISABLE);
3787                         adev->gfx.config.rb_config[i][j].user_rb_backend_disable =
3788                                 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3789                         adev->gfx.config.rb_config[i][j].raster_config =
3790                                 RREG32(mmPA_SC_RASTER_CONFIG);
3791                         adev->gfx.config.rb_config[i][j].raster_config_1 =
3792                                 RREG32(mmPA_SC_RASTER_CONFIG_1);
3793                 }
3794         }
3795         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3796         mutex_unlock(&adev->grbm_idx_mutex);
3797 }
3798
3799 /**
3800  * gfx_v8_0_init_compute_vmid - gart enable
3801  *
3802  * @adev: amdgpu_device pointer
3803  *
3804  * Initialize compute vmid sh_mem registers
3805  *
3806  */
3807 #define DEFAULT_SH_MEM_BASES    (0x6000)
3808 #define FIRST_COMPUTE_VMID      (8)
3809 #define LAST_COMPUTE_VMID       (16)
3810 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
3811 {
3812         int i;
3813         uint32_t sh_mem_config;
3814         uint32_t sh_mem_bases;
3815
3816         /*
3817          * Configure apertures:
3818          * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
3819          * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
3820          * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
3821          */
3822         sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
3823
3824         sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
3825                         SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
3826                         SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
3827                         SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
3828                         MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
3829                         SH_MEM_CONFIG__PRIVATE_ATC_MASK;
3830
3831         mutex_lock(&adev->srbm_mutex);
3832         for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
3833                 vi_srbm_select(adev, 0, 0, 0, i);
3834                 /* CP and shaders */
3835                 WREG32(mmSH_MEM_CONFIG, sh_mem_config);
3836                 WREG32(mmSH_MEM_APE1_BASE, 1);
3837                 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3838                 WREG32(mmSH_MEM_BASES, sh_mem_bases);
3839         }
3840         vi_srbm_select(adev, 0, 0, 0, 0);
3841         mutex_unlock(&adev->srbm_mutex);
3842 }
3843
3844 static void gfx_v8_0_config_init(struct amdgpu_device *adev)
3845 {
3846         switch (adev->asic_type) {
3847         default:
3848                 adev->gfx.config.double_offchip_lds_buf = 1;
3849                 break;
3850         case CHIP_CARRIZO:
3851         case CHIP_STONEY:
3852                 adev->gfx.config.double_offchip_lds_buf = 0;
3853                 break;
3854         }
3855 }
3856
3857 static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
3858 {
3859         u32 tmp, sh_static_mem_cfg;
3860         int i;
3861
3862         WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF);
3863         WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3864         WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3865         WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
3866
3867         gfx_v8_0_tiling_mode_table_init(adev);
3868         gfx_v8_0_setup_rb(adev);
3869         gfx_v8_0_get_cu_info(adev);
3870         gfx_v8_0_config_init(adev);
3871
3872         /* XXX SH_MEM regs */
3873         /* where to put LDS, scratch, GPUVM in FSA64 space */
3874         sh_static_mem_cfg = REG_SET_FIELD(0, SH_STATIC_MEM_CONFIG,
3875                                    SWIZZLE_ENABLE, 1);
3876         sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3877                                    ELEMENT_SIZE, 1);
3878         sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3879                                    INDEX_STRIDE, 3);
3880         WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg);
3881
3882         mutex_lock(&adev->srbm_mutex);
3883         for (i = 0; i < adev->vm_manager.id_mgr[0].num_ids; i++) {
3884                 vi_srbm_select(adev, 0, 0, 0, i);
3885                 /* CP and shaders */
3886                 if (i == 0) {
3887                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
3888                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3889                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3890                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3891                         WREG32(mmSH_MEM_CONFIG, tmp);
3892                         WREG32(mmSH_MEM_BASES, 0);
3893                 } else {
3894                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
3895                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3896                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3897                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3898                         WREG32(mmSH_MEM_CONFIG, tmp);
3899                         tmp = adev->gmc.shared_aperture_start >> 48;
3900                         WREG32(mmSH_MEM_BASES, tmp);
3901                 }
3902
3903                 WREG32(mmSH_MEM_APE1_BASE, 1);
3904                 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3905         }
3906         vi_srbm_select(adev, 0, 0, 0, 0);
3907         mutex_unlock(&adev->srbm_mutex);
3908
3909         gfx_v8_0_init_compute_vmid(adev);
3910
3911         mutex_lock(&adev->grbm_idx_mutex);
3912         /*
3913          * making sure that the following register writes will be broadcasted
3914          * to all the shaders
3915          */
3916         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3917
3918         WREG32(mmPA_SC_FIFO_SIZE,
3919                    (adev->gfx.config.sc_prim_fifo_size_frontend <<
3920                         PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
3921                    (adev->gfx.config.sc_prim_fifo_size_backend <<
3922                         PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
3923                    (adev->gfx.config.sc_hiz_tile_fifo_size <<
3924                         PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
3925                    (adev->gfx.config.sc_earlyz_tile_fifo_size <<
3926                         PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
3927
3928         tmp = RREG32(mmSPI_ARB_PRIORITY);
3929         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS0, 2);
3930         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS1, 2);
3931         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS2, 2);
3932         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS3, 2);
3933         WREG32(mmSPI_ARB_PRIORITY, tmp);
3934
3935         mutex_unlock(&adev->grbm_idx_mutex);
3936
3937 }
3938
3939 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3940 {
3941         u32 i, j, k;
3942         u32 mask;
3943
3944         mutex_lock(&adev->grbm_idx_mutex);
3945         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3946                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3947                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3948                         for (k = 0; k < adev->usec_timeout; k++) {
3949                                 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3950                                         break;
3951                                 udelay(1);
3952                         }
3953                         if (k == adev->usec_timeout) {
3954                                 gfx_v8_0_select_se_sh(adev, 0xffffffff,
3955                                                       0xffffffff, 0xffffffff);
3956                                 mutex_unlock(&adev->grbm_idx_mutex);
3957                                 DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
3958                                          i, j);
3959                                 return;
3960                         }
3961                 }
3962         }
3963         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3964         mutex_unlock(&adev->grbm_idx_mutex);
3965
3966         mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3967                 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3968                 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3969                 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3970         for (k = 0; k < adev->usec_timeout; k++) {
3971                 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3972                         break;
3973                 udelay(1);
3974         }
3975 }
3976
3977 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3978                                                bool enable)
3979 {
3980         u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3981
3982         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
3983         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
3984         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
3985         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
3986
3987         WREG32(mmCP_INT_CNTL_RING0, tmp);
3988 }
3989
3990 static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
3991 {
3992         /* csib */
3993         WREG32(mmRLC_CSIB_ADDR_HI,
3994                         adev->gfx.rlc.clear_state_gpu_addr >> 32);
3995         WREG32(mmRLC_CSIB_ADDR_LO,
3996                         adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
3997         WREG32(mmRLC_CSIB_LENGTH,
3998                         adev->gfx.rlc.clear_state_size);
3999 }
4000
4001 static void gfx_v8_0_parse_ind_reg_list(int *register_list_format,
4002                                 int ind_offset,
4003                                 int list_size,
4004                                 int *unique_indices,
4005                                 int *indices_count,
4006                                 int max_indices,
4007                                 int *ind_start_offsets,
4008                                 int *offset_count,
4009                                 int max_offset)
4010 {
4011         int indices;
4012         bool new_entry = true;
4013
4014         for (; ind_offset < list_size; ind_offset++) {
4015
4016                 if (new_entry) {
4017                         new_entry = false;
4018                         ind_start_offsets[*offset_count] = ind_offset;
4019                         *offset_count = *offset_count + 1;
4020                         BUG_ON(*offset_count >= max_offset);
4021                 }
4022
4023                 if (register_list_format[ind_offset] == 0xFFFFFFFF) {
4024                         new_entry = true;
4025                         continue;
4026                 }
4027
4028                 ind_offset += 2;
4029
4030                 /* look for the matching indice */
4031                 for (indices = 0;
4032                         indices < *indices_count;
4033                         indices++) {
4034                         if (unique_indices[indices] ==
4035                                 register_list_format[ind_offset])
4036                                 break;
4037                 }
4038
4039                 if (indices >= *indices_count) {
4040                         unique_indices[*indices_count] =
4041                                 register_list_format[ind_offset];
4042                         indices = *indices_count;
4043                         *indices_count = *indices_count + 1;
4044                         BUG_ON(*indices_count >= max_indices);
4045                 }
4046
4047                 register_list_format[ind_offset] = indices;
4048         }
4049 }
4050
4051 static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
4052 {
4053         int i, temp, data;
4054         int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0};
4055         int indices_count = 0;
4056         int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
4057         int offset_count = 0;
4058
4059         int list_size;
4060         unsigned int *register_list_format =
4061                 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
4062         if (!register_list_format)
4063                 return -ENOMEM;
4064         memcpy(register_list_format, adev->gfx.rlc.register_list_format,
4065                         adev->gfx.rlc.reg_list_format_size_bytes);
4066
4067         gfx_v8_0_parse_ind_reg_list(register_list_format,
4068                                 RLC_FormatDirectRegListLength,
4069                                 adev->gfx.rlc.reg_list_format_size_bytes >> 2,
4070                                 unique_indices,
4071                                 &indices_count,
4072                                 ARRAY_SIZE(unique_indices),
4073                                 indirect_start_offsets,
4074                                 &offset_count,
4075                                 ARRAY_SIZE(indirect_start_offsets));
4076
4077         /* save and restore list */
4078         WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1);
4079
4080         WREG32(mmRLC_SRM_ARAM_ADDR, 0);
4081         for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
4082                 WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]);
4083
4084         /* indirect list */
4085         WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start);
4086         for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
4087                 WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]);
4088
4089         list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
4090         list_size = list_size >> 1;
4091         WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size);
4092         WREG32(mmRLC_GPM_SCRATCH_DATA, list_size);
4093
4094         /* starting offsets starts */
4095         WREG32(mmRLC_GPM_SCRATCH_ADDR,
4096                 adev->gfx.rlc.starting_offsets_start);
4097         for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
4098                 WREG32(mmRLC_GPM_SCRATCH_DATA,
4099                                 indirect_start_offsets[i]);
4100
4101         /* unique indices */
4102         temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
4103         data = mmRLC_SRM_INDEX_CNTL_DATA_0;
4104         for (i = 0; i < ARRAY_SIZE(unique_indices); i++) {
4105                 if (unique_indices[i] != 0) {
4106                         WREG32(temp + i, unique_indices[i] & 0x3FFFF);
4107                         WREG32(data + i, unique_indices[i] >> 20);
4108                 }
4109         }
4110         kfree(register_list_format);
4111
4112         return 0;
4113 }
4114
4115 static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
4116 {
4117         WREG32_FIELD(RLC_SRM_CNTL, SRM_ENABLE, 1);
4118 }
4119
4120 static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev)
4121 {
4122         uint32_t data;
4123
4124         WREG32_FIELD(CP_RB_WPTR_POLL_CNTL, IDLE_POLL_COUNT, 0x60);
4125
4126         data = REG_SET_FIELD(0, RLC_PG_DELAY, POWER_UP_DELAY, 0x10);
4127         data = REG_SET_FIELD(data, RLC_PG_DELAY, POWER_DOWN_DELAY, 0x10);
4128         data = REG_SET_FIELD(data, RLC_PG_DELAY, CMD_PROPAGATE_DELAY, 0x10);
4129         data = REG_SET_FIELD(data, RLC_PG_DELAY, MEM_SLEEP_DELAY, 0x10);
4130         WREG32(mmRLC_PG_DELAY, data);
4131
4132         WREG32_FIELD(RLC_PG_DELAY_2, SERDES_CMD_DELAY, 0x3);
4133         WREG32_FIELD(RLC_AUTO_PG_CTRL, GRBM_REG_SAVE_GFX_IDLE_THRESHOLD, 0x55f0);
4134
4135 }
4136
4137 static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
4138                                                 bool enable)
4139 {
4140         WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PU_ENABLE, enable ? 1 : 0);
4141 }
4142
4143 static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
4144                                                   bool enable)
4145 {
4146         WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PD_ENABLE, enable ? 1 : 0);
4147 }
4148
4149 static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable)
4150 {
4151         WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 0 : 1);
4152 }
4153
4154 static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
4155 {
4156         if ((adev->asic_type == CHIP_CARRIZO) ||
4157             (adev->asic_type == CHIP_STONEY)) {
4158                 gfx_v8_0_init_csb(adev);
4159                 gfx_v8_0_init_save_restore_list(adev);
4160                 gfx_v8_0_enable_save_restore_machine(adev);
4161                 WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
4162                 gfx_v8_0_init_power_gating(adev);
4163                 WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
4164         } else if ((adev->asic_type == CHIP_POLARIS11) ||
4165                    (adev->asic_type == CHIP_POLARIS12) ||
4166                    (adev->asic_type == CHIP_VEGAM)) {
4167                 gfx_v8_0_init_csb(adev);
4168                 gfx_v8_0_init_save_restore_list(adev);
4169                 gfx_v8_0_enable_save_restore_machine(adev);
4170                 gfx_v8_0_init_power_gating(adev);
4171         }
4172
4173 }
4174
4175 static void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
4176 {
4177         WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 0);
4178
4179         gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4180         gfx_v8_0_wait_for_rlc_serdes(adev);
4181 }
4182
4183 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
4184 {
4185         WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4186         udelay(50);
4187
4188         WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
4189         udelay(50);
4190 }
4191
4192 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
4193 {
4194         WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 1);
4195
4196         /* carrizo do enable cp interrupt after cp inited */
4197         if (!(adev->flags & AMD_IS_APU))
4198                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4199
4200         udelay(50);
4201 }
4202
4203 static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev)
4204 {
4205         const struct rlc_firmware_header_v2_0 *hdr;
4206         const __le32 *fw_data;
4207         unsigned i, fw_size;
4208
4209         if (!adev->gfx.rlc_fw)
4210                 return -EINVAL;
4211
4212         hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
4213         amdgpu_ucode_print_rlc_hdr(&hdr->header);
4214
4215         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
4216                            le32_to_cpu(hdr->header.ucode_array_offset_bytes));
4217         fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
4218
4219         WREG32(mmRLC_GPM_UCODE_ADDR, 0);
4220         for (i = 0; i < fw_size; i++)
4221                 WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
4222         WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
4223
4224         return 0;
4225 }
4226
4227 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
4228 {
4229         int r;
4230         u32 tmp;
4231
4232         gfx_v8_0_rlc_stop(adev);
4233
4234         /* disable CG */
4235         tmp = RREG32(mmRLC_CGCG_CGLS_CTRL);
4236         tmp &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
4237                  RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4238         WREG32(mmRLC_CGCG_CGLS_CTRL, tmp);
4239         if (adev->asic_type == CHIP_POLARIS11 ||
4240             adev->asic_type == CHIP_POLARIS10 ||
4241             adev->asic_type == CHIP_POLARIS12 ||
4242             adev->asic_type == CHIP_VEGAM) {
4243                 tmp = RREG32(mmRLC_CGCG_CGLS_CTRL_3D);
4244                 tmp &= ~0x3;
4245                 WREG32(mmRLC_CGCG_CGLS_CTRL_3D, tmp);
4246         }
4247
4248         /* disable PG */
4249         WREG32(mmRLC_PG_CNTL, 0);
4250
4251         gfx_v8_0_rlc_reset(adev);
4252         gfx_v8_0_init_pg(adev);
4253
4254
4255         if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
4256                 /* legacy rlc firmware loading */
4257                 r = gfx_v8_0_rlc_load_microcode(adev);
4258                 if (r)
4259                         return r;
4260         }
4261
4262         gfx_v8_0_rlc_start(adev);
4263
4264         return 0;
4265 }
4266
4267 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
4268 {
4269         int i;
4270         u32 tmp = RREG32(mmCP_ME_CNTL);
4271
4272         if (enable) {
4273                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
4274                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
4275                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
4276         } else {
4277                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
4278                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
4279                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
4280                 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
4281                         adev->gfx.gfx_ring[i].ready = false;
4282         }
4283         WREG32(mmCP_ME_CNTL, tmp);
4284         udelay(50);
4285 }
4286
4287 static int gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
4288 {
4289         const struct gfx_firmware_header_v1_0 *pfp_hdr;
4290         const struct gfx_firmware_header_v1_0 *ce_hdr;
4291         const struct gfx_firmware_header_v1_0 *me_hdr;
4292         const __le32 *fw_data;
4293         unsigned i, fw_size;
4294
4295         if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
4296                 return -EINVAL;
4297
4298         pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
4299                 adev->gfx.pfp_fw->data;
4300         ce_hdr = (const struct gfx_firmware_header_v1_0 *)
4301                 adev->gfx.ce_fw->data;
4302         me_hdr = (const struct gfx_firmware_header_v1_0 *)
4303                 adev->gfx.me_fw->data;
4304
4305         amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
4306         amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
4307         amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
4308
4309         gfx_v8_0_cp_gfx_enable(adev, false);
4310
4311         /* PFP */
4312         fw_data = (const __le32 *)
4313                 (adev->gfx.pfp_fw->data +
4314                  le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
4315         fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
4316         WREG32(mmCP_PFP_UCODE_ADDR, 0);
4317         for (i = 0; i < fw_size; i++)
4318                 WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
4319         WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
4320
4321         /* CE */
4322         fw_data = (const __le32 *)
4323                 (adev->gfx.ce_fw->data +
4324                  le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
4325         fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
4326         WREG32(mmCP_CE_UCODE_ADDR, 0);
4327         for (i = 0; i < fw_size; i++)
4328                 WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
4329         WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
4330
4331         /* ME */
4332         fw_data = (const __le32 *)
4333                 (adev->gfx.me_fw->data +
4334                  le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
4335         fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
4336         WREG32(mmCP_ME_RAM_WADDR, 0);
4337         for (i = 0; i < fw_size; i++)
4338                 WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
4339         WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
4340
4341         return 0;
4342 }
4343
4344 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
4345 {
4346         u32 count = 0;
4347         const struct cs_section_def *sect = NULL;
4348         const struct cs_extent_def *ext = NULL;
4349
4350         /* begin clear state */
4351         count += 2;
4352         /* context control state */
4353         count += 3;
4354
4355         for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4356                 for (ext = sect->section; ext->extent != NULL; ++ext) {
4357                         if (sect->id == SECT_CONTEXT)
4358                                 count += 2 + ext->reg_count;
4359                         else
4360                                 return 0;
4361                 }
4362         }
4363         /* pa_sc_raster_config/pa_sc_raster_config1 */
4364         count += 4;
4365         /* end clear state */
4366         count += 2;
4367         /* clear state */
4368         count += 2;
4369
4370         return count;
4371 }
4372
4373 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
4374 {
4375         struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
4376         const struct cs_section_def *sect = NULL;
4377         const struct cs_extent_def *ext = NULL;
4378         int r, i;
4379
4380         /* init the CP */
4381         WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
4382         WREG32(mmCP_ENDIAN_SWAP, 0);
4383         WREG32(mmCP_DEVICE_ID, 1);
4384
4385         gfx_v8_0_cp_gfx_enable(adev, true);
4386
4387         r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
4388         if (r) {
4389                 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
4390                 return r;
4391         }
4392
4393         /* clear state buffer */
4394         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4395         amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4396
4397         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4398         amdgpu_ring_write(ring, 0x80000000);
4399         amdgpu_ring_write(ring, 0x80000000);
4400
4401         for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4402                 for (ext = sect->section; ext->extent != NULL; ++ext) {
4403                         if (sect->id == SECT_CONTEXT) {
4404                                 amdgpu_ring_write(ring,
4405                                        PACKET3(PACKET3_SET_CONTEXT_REG,
4406                                                ext->reg_count));
4407                                 amdgpu_ring_write(ring,
4408                                        ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
4409                                 for (i = 0; i < ext->reg_count; i++)
4410                                         amdgpu_ring_write(ring, ext->extent[i]);
4411                         }
4412                 }
4413         }
4414
4415         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4416         amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
4417         amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config);
4418         amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config_1);
4419
4420         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4421         amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4422
4423         amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4424         amdgpu_ring_write(ring, 0);
4425
4426         /* init the CE partitions */
4427         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4428         amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4429         amdgpu_ring_write(ring, 0x8000);
4430         amdgpu_ring_write(ring, 0x8000);
4431
4432         amdgpu_ring_commit(ring);
4433
4434         return 0;
4435 }
4436 static void gfx_v8_0_set_cpg_door_bell(struct amdgpu_device *adev, struct amdgpu_ring *ring)
4437 {
4438         u32 tmp;
4439         /* no gfx doorbells on iceland */
4440         if (adev->asic_type == CHIP_TOPAZ)
4441                 return;
4442
4443         tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
4444
4445         if (ring->use_doorbell) {
4446                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4447                                 DOORBELL_OFFSET, ring->doorbell_index);
4448                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4449                                                 DOORBELL_HIT, 0);
4450                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4451                                             DOORBELL_EN, 1);
4452         } else {
4453                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
4454         }
4455
4456         WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
4457
4458         if (adev->flags & AMD_IS_APU)
4459                 return;
4460
4461         tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
4462                                         DOORBELL_RANGE_LOWER,
4463                                         AMDGPU_DOORBELL_GFX_RING0);
4464         WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
4465
4466         WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
4467                 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
4468 }
4469
4470 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
4471 {
4472         struct amdgpu_ring *ring;
4473         u32 tmp;
4474         u32 rb_bufsz;
4475         u64 rb_addr, rptr_addr, wptr_gpu_addr;
4476         int r;
4477
4478         /* Set the write pointer delay */
4479         WREG32(mmCP_RB_WPTR_DELAY, 0);
4480
4481         /* set the RB to use vmid 0 */
4482         WREG32(mmCP_RB_VMID, 0);
4483
4484         /* Set ring buffer size */
4485         ring = &adev->gfx.gfx_ring[0];
4486         rb_bufsz = order_base_2(ring->ring_size / 8);
4487         tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
4488         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
4489         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
4490         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
4491 #ifdef __BIG_ENDIAN
4492         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
4493 #endif
4494         WREG32(mmCP_RB0_CNTL, tmp);
4495
4496         /* Initialize the ring buffer's read and write pointers */
4497         WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
4498         ring->wptr = 0;
4499         WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
4500
4501         /* set the wb address wether it's enabled or not */
4502         rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4503         WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
4504         WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
4505
4506         wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4507         WREG32(mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
4508         WREG32(mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
4509         mdelay(1);
4510         WREG32(mmCP_RB0_CNTL, tmp);
4511
4512         rb_addr = ring->gpu_addr >> 8;
4513         WREG32(mmCP_RB0_BASE, rb_addr);
4514         WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
4515
4516         gfx_v8_0_set_cpg_door_bell(adev, ring);
4517         /* start the ring */
4518         amdgpu_ring_clear_ring(ring);
4519         gfx_v8_0_cp_gfx_start(adev);
4520         ring->ready = true;
4521         r = amdgpu_ring_test_ring(ring);
4522         if (r)
4523                 ring->ready = false;
4524
4525         return r;
4526 }
4527
4528 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
4529 {
4530         int i;
4531
4532         if (enable) {
4533                 WREG32(mmCP_MEC_CNTL, 0);
4534         } else {
4535                 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
4536                 for (i = 0; i < adev->gfx.num_compute_rings; i++)
4537                         adev->gfx.compute_ring[i].ready = false;
4538                 adev->gfx.kiq.ring.ready = false;
4539         }
4540         udelay(50);
4541 }
4542
4543 static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev)
4544 {
4545         const struct gfx_firmware_header_v1_0 *mec_hdr;
4546         const __le32 *fw_data;
4547         unsigned i, fw_size;
4548
4549         if (!adev->gfx.mec_fw)
4550                 return -EINVAL;
4551
4552         gfx_v8_0_cp_compute_enable(adev, false);
4553
4554         mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
4555         amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
4556
4557         fw_data = (const __le32 *)
4558                 (adev->gfx.mec_fw->data +
4559                  le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4560         fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4561
4562         /* MEC1 */
4563         WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0);
4564         for (i = 0; i < fw_size; i++)
4565                 WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data+i));
4566         WREG32(mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version);
4567
4568         /* Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
4569         if (adev->gfx.mec2_fw) {
4570                 const struct gfx_firmware_header_v1_0 *mec2_hdr;
4571
4572                 mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
4573                 amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header);
4574
4575                 fw_data = (const __le32 *)
4576                         (adev->gfx.mec2_fw->data +
4577                          le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4578                 fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4579
4580                 WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0);
4581                 for (i = 0; i < fw_size; i++)
4582                         WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data+i));
4583                 WREG32(mmCP_MEC_ME2_UCODE_ADDR, adev->gfx.mec2_fw_version);
4584         }
4585
4586         return 0;
4587 }
4588
4589 /* KIQ functions */
4590 static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring)
4591 {
4592         uint32_t tmp;
4593         struct amdgpu_device *adev = ring->adev;
4594
4595         /* tell RLC which is KIQ queue */
4596         tmp = RREG32(mmRLC_CP_SCHEDULERS);
4597         tmp &= 0xffffff00;
4598         tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
4599         WREG32(mmRLC_CP_SCHEDULERS, tmp);
4600         tmp |= 0x80;
4601         WREG32(mmRLC_CP_SCHEDULERS, tmp);
4602 }
4603
4604 static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev)
4605 {
4606         struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
4607         uint32_t scratch, tmp = 0;
4608         uint64_t queue_mask = 0;
4609         int r, i;
4610
4611         for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
4612                 if (!test_bit(i, adev->gfx.mec.queue_bitmap))
4613                         continue;
4614
4615                 /* This situation may be hit in the future if a new HW
4616                  * generation exposes more than 64 queues. If so, the
4617                  * definition of queue_mask needs updating */
4618                 if (WARN_ON(i >= (sizeof(queue_mask)*8))) {
4619                         DRM_ERROR("Invalid KCQ enabled: %d\n", i);
4620                         break;
4621                 }
4622
4623                 queue_mask |= (1ull << i);
4624         }
4625
4626         r = amdgpu_gfx_scratch_get(adev, &scratch);
4627         if (r) {
4628                 DRM_ERROR("Failed to get scratch reg (%d).\n", r);
4629                 return r;
4630         }
4631         WREG32(scratch, 0xCAFEDEAD);
4632
4633         r = amdgpu_ring_alloc(kiq_ring, (8 * adev->gfx.num_compute_rings) + 11);
4634         if (r) {
4635                 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
4636                 amdgpu_gfx_scratch_free(adev, scratch);
4637                 return r;
4638         }
4639         /* set resources */
4640         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
4641         amdgpu_ring_write(kiq_ring, 0); /* vmid_mask:0 queue_type:0 (KIQ) */
4642         amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */
4643         amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */
4644         amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
4645         amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
4646         amdgpu_ring_write(kiq_ring, 0); /* oac mask */
4647         amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
4648         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4649                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4650                 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
4651                 uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4652
4653                 /* map queues */
4654                 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
4655                 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
4656                 amdgpu_ring_write(kiq_ring,
4657                                   PACKET3_MAP_QUEUES_NUM_QUEUES(1));
4658                 amdgpu_ring_write(kiq_ring,
4659                                   PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index) |
4660                                   PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
4661                                   PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
4662                                   PACKET3_MAP_QUEUES_ME(ring->me == 1 ? 0 : 1)); /* doorbell */
4663                 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
4664                 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
4665                 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
4666                 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
4667         }
4668         /* write to scratch for completion */
4669         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
4670         amdgpu_ring_write(kiq_ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
4671         amdgpu_ring_write(kiq_ring, 0xDEADBEEF);
4672         amdgpu_ring_commit(kiq_ring);
4673
4674         for (i = 0; i < adev->usec_timeout; i++) {
4675                 tmp = RREG32(scratch);
4676                 if (tmp == 0xDEADBEEF)
4677                         break;
4678                 DRM_UDELAY(1);
4679         }
4680         if (i >= adev->usec_timeout) {
4681                 DRM_ERROR("KCQ enable failed (scratch(0x%04X)=0x%08X)\n",
4682                           scratch, tmp);
4683                 r = -EINVAL;
4684         }
4685         amdgpu_gfx_scratch_free(adev, scratch);
4686
4687         return r;
4688 }
4689
4690 static int gfx_v8_0_deactivate_hqd(struct amdgpu_device *adev, u32 req)
4691 {
4692         int i, r = 0;
4693
4694         if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) {
4695                 WREG32_FIELD(CP_HQD_DEQUEUE_REQUEST, DEQUEUE_REQ, req);
4696                 for (i = 0; i < adev->usec_timeout; i++) {
4697                         if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK))
4698                                 break;
4699                         udelay(1);
4700                 }
4701                 if (i == adev->usec_timeout)
4702                         r = -ETIMEDOUT;
4703         }
4704         WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
4705         WREG32(mmCP_HQD_PQ_RPTR, 0);
4706         WREG32(mmCP_HQD_PQ_WPTR, 0);
4707
4708         return r;
4709 }
4710
4711 static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
4712 {
4713         struct amdgpu_device *adev = ring->adev;
4714         struct vi_mqd *mqd = ring->mqd_ptr;
4715         uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
4716         uint32_t tmp;
4717
4718         mqd->header = 0xC0310800;
4719         mqd->compute_pipelinestat_enable = 0x00000001;
4720         mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4721         mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4722         mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4723         mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4724         mqd->compute_misc_reserved = 0x00000003;
4725         mqd->dynamic_cu_mask_addr_lo = lower_32_bits(ring->mqd_gpu_addr
4726                                                      + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4727         mqd->dynamic_cu_mask_addr_hi = upper_32_bits(ring->mqd_gpu_addr
4728                                                      + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4729         eop_base_addr = ring->eop_gpu_addr >> 8;
4730         mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
4731         mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
4732
4733         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4734         tmp = RREG32(mmCP_HQD_EOP_CONTROL);
4735         tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
4736                         (order_base_2(GFX8_MEC_HPD_SIZE / 4) - 1));
4737
4738         mqd->cp_hqd_eop_control = tmp;
4739
4740         /* enable doorbell? */
4741         tmp = REG_SET_FIELD(RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL),
4742                             CP_HQD_PQ_DOORBELL_CONTROL,
4743                             DOORBELL_EN,
4744                             ring->use_doorbell ? 1 : 0);
4745
4746         mqd->cp_hqd_pq_doorbell_control = tmp;
4747
4748         /* set the pointer to the MQD */
4749         mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
4750         mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
4751
4752         /* set MQD vmid to 0 */
4753         tmp = RREG32(mmCP_MQD_CONTROL);
4754         tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4755         mqd->cp_mqd_control = tmp;
4756
4757         /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4758         hqd_gpu_addr = ring->gpu_addr >> 8;
4759         mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4760         mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4761
4762         /* set up the HQD, this is similar to CP_RB0_CNTL */
4763         tmp = RREG32(mmCP_HQD_PQ_CONTROL);
4764         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4765                             (order_base_2(ring->ring_size / 4) - 1));
4766         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4767                         ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
4768 #ifdef __BIG_ENDIAN
4769         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
4770 #endif
4771         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
4772         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
4773         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4774         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4775         mqd->cp_hqd_pq_control = tmp;
4776
4777         /* set the wb address whether it's enabled or not */
4778         wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4779         mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4780         mqd->cp_hqd_pq_rptr_report_addr_hi =
4781                 upper_32_bits(wb_gpu_addr) & 0xffff;
4782
4783         /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4784         wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4785         mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
4786         mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4787
4788         tmp = 0;
4789         /* enable the doorbell if requested */
4790         if (ring->use_doorbell) {
4791                 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4792                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4793                                 DOORBELL_OFFSET, ring->doorbell_index);
4794
4795                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4796                                          DOORBELL_EN, 1);
4797                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4798                                          DOORBELL_SOURCE, 0);
4799                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4800                                          DOORBELL_HIT, 0);
4801         }
4802
4803         mqd->cp_hqd_pq_doorbell_control = tmp;
4804
4805         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4806         ring->wptr = 0;
4807         mqd->cp_hqd_pq_wptr = ring->wptr;
4808         mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
4809
4810         /* set the vmid for the queue */
4811         mqd->cp_hqd_vmid = 0;
4812
4813         tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
4814         tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
4815         mqd->cp_hqd_persistent_state = tmp;
4816
4817         /* set MTYPE */
4818         tmp = RREG32(mmCP_HQD_IB_CONTROL);
4819         tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
4820         tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MTYPE, 3);
4821         mqd->cp_hqd_ib_control = tmp;
4822
4823         tmp = RREG32(mmCP_HQD_IQ_TIMER);
4824         tmp = REG_SET_FIELD(tmp, CP_HQD_IQ_TIMER, MTYPE, 3);
4825         mqd->cp_hqd_iq_timer = tmp;
4826
4827         tmp = RREG32(mmCP_HQD_CTX_SAVE_CONTROL);
4828         tmp = REG_SET_FIELD(tmp, CP_HQD_CTX_SAVE_CONTROL, MTYPE, 3);
4829         mqd->cp_hqd_ctx_save_control = tmp;
4830
4831         /* defaults */
4832         mqd->cp_hqd_eop_rptr = RREG32(mmCP_HQD_EOP_RPTR);
4833         mqd->cp_hqd_eop_wptr = RREG32(mmCP_HQD_EOP_WPTR);
4834         mqd->cp_hqd_pipe_priority = RREG32(mmCP_HQD_PIPE_PRIORITY);
4835         mqd->cp_hqd_queue_priority = RREG32(mmCP_HQD_QUEUE_PRIORITY);
4836         mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM);
4837         mqd->cp_hqd_ctx_save_base_addr_lo = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_LO);
4838         mqd->cp_hqd_ctx_save_base_addr_hi = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_HI);
4839         mqd->cp_hqd_cntl_stack_offset = RREG32(mmCP_HQD_CNTL_STACK_OFFSET);
4840         mqd->cp_hqd_cntl_stack_size = RREG32(mmCP_HQD_CNTL_STACK_SIZE);
4841         mqd->cp_hqd_wg_state_offset = RREG32(mmCP_HQD_WG_STATE_OFFSET);
4842         mqd->cp_hqd_ctx_save_size = RREG32(mmCP_HQD_CTX_SAVE_SIZE);
4843         mqd->cp_hqd_eop_done_events = RREG32(mmCP_HQD_EOP_EVENTS);
4844         mqd->cp_hqd_error = RREG32(mmCP_HQD_ERROR);
4845         mqd->cp_hqd_eop_wptr_mem = RREG32(mmCP_HQD_EOP_WPTR_MEM);
4846         mqd->cp_hqd_eop_dones = RREG32(mmCP_HQD_EOP_DONES);
4847
4848         /* activate the queue */
4849         mqd->cp_hqd_active = 1;
4850
4851         return 0;
4852 }
4853
4854 int gfx_v8_0_mqd_commit(struct amdgpu_device *adev,
4855                         struct vi_mqd *mqd)
4856 {
4857         uint32_t mqd_reg;
4858         uint32_t *mqd_data;
4859
4860         /* HQD registers extend from mmCP_MQD_BASE_ADDR to mmCP_HQD_ERROR */
4861         mqd_data = &mqd->cp_mqd_base_addr_lo;
4862
4863         /* disable wptr polling */
4864         WREG32_FIELD(CP_PQ_WPTR_POLL_CNTL, EN, 0);
4865
4866         /* program all HQD registers */
4867         for (mqd_reg = mmCP_HQD_VMID; mqd_reg <= mmCP_HQD_EOP_CONTROL; mqd_reg++)
4868                 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4869
4870         /* Tonga errata: EOP RPTR/WPTR should be left unmodified.
4871          * This is safe since EOP RPTR==WPTR for any inactive HQD
4872          * on ASICs that do not support context-save.
4873          * EOP writes/reads can start anywhere in the ring.
4874          */
4875         if (adev->asic_type != CHIP_TONGA) {
4876                 WREG32(mmCP_HQD_EOP_RPTR, mqd->cp_hqd_eop_rptr);
4877                 WREG32(mmCP_HQD_EOP_WPTR, mqd->cp_hqd_eop_wptr);
4878                 WREG32(mmCP_HQD_EOP_WPTR_MEM, mqd->cp_hqd_eop_wptr_mem);
4879         }
4880
4881         for (mqd_reg = mmCP_HQD_EOP_EVENTS; mqd_reg <= mmCP_HQD_ERROR; mqd_reg++)
4882                 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4883
4884         /* activate the HQD */
4885         for (mqd_reg = mmCP_MQD_BASE_ADDR; mqd_reg <= mmCP_HQD_ACTIVE; mqd_reg++)
4886                 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4887
4888         return 0;
4889 }
4890
4891 static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring)
4892 {
4893         struct amdgpu_device *adev = ring->adev;
4894         struct vi_mqd *mqd = ring->mqd_ptr;
4895         int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
4896
4897         gfx_v8_0_kiq_setting(ring);
4898
4899         if (adev->in_gpu_reset) { /* for GPU_RESET case */
4900                 /* reset MQD to a clean status */
4901                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4902                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4903
4904                 /* reset ring buffer */
4905                 ring->wptr = 0;
4906                 amdgpu_ring_clear_ring(ring);
4907                 mutex_lock(&adev->srbm_mutex);
4908                 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4909                 gfx_v8_0_mqd_commit(adev, mqd);
4910                 vi_srbm_select(adev, 0, 0, 0, 0);
4911                 mutex_unlock(&adev->srbm_mutex);
4912         } else {
4913                 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4914                 ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4915                 ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
4916                 mutex_lock(&adev->srbm_mutex);
4917                 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4918                 gfx_v8_0_mqd_init(ring);
4919                 gfx_v8_0_mqd_commit(adev, mqd);
4920                 vi_srbm_select(adev, 0, 0, 0, 0);
4921                 mutex_unlock(&adev->srbm_mutex);
4922
4923                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4924                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4925         }
4926
4927         return 0;
4928 }
4929
4930 static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring)
4931 {
4932         struct amdgpu_device *adev = ring->adev;
4933         struct vi_mqd *mqd = ring->mqd_ptr;
4934         int mqd_idx = ring - &adev->gfx.compute_ring[0];
4935
4936         if (!adev->in_gpu_reset && !adev->gfx.in_suspend) {
4937                 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4938                 ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4939                 ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
4940                 mutex_lock(&adev->srbm_mutex);
4941                 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4942                 gfx_v8_0_mqd_init(ring);
4943                 vi_srbm_select(adev, 0, 0, 0, 0);
4944                 mutex_unlock(&adev->srbm_mutex);
4945
4946                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4947                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4948         } else if (adev->in_gpu_reset) { /* for GPU_RESET case */
4949                 /* reset MQD to a clean status */
4950                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4951                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4952                 /* reset ring buffer */
4953                 ring->wptr = 0;
4954                 amdgpu_ring_clear_ring(ring);
4955         } else {
4956                 amdgpu_ring_clear_ring(ring);
4957         }
4958         return 0;
4959 }
4960
4961 static void gfx_v8_0_set_mec_doorbell_range(struct amdgpu_device *adev)
4962 {
4963         if (adev->asic_type > CHIP_TONGA) {
4964                 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, AMDGPU_DOORBELL_KIQ << 2);
4965                 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, AMDGPU_DOORBELL_MEC_RING7 << 2);
4966         }
4967         /* enable doorbells */
4968         WREG32_FIELD(CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4969 }
4970
4971 static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev)
4972 {
4973         struct amdgpu_ring *ring = NULL;
4974         int r = 0, i;
4975
4976         gfx_v8_0_cp_compute_enable(adev, true);
4977
4978         ring = &adev->gfx.kiq.ring;
4979
4980         r = amdgpu_bo_reserve(ring->mqd_obj, false);
4981         if (unlikely(r != 0))
4982                 goto done;
4983
4984         r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4985         if (!r) {
4986                 r = gfx_v8_0_kiq_init_queue(ring);
4987                 amdgpu_bo_kunmap(ring->mqd_obj);
4988                 ring->mqd_ptr = NULL;
4989         }
4990         amdgpu_bo_unreserve(ring->mqd_obj);
4991         if (r)
4992                 goto done;
4993
4994         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4995                 ring = &adev->gfx.compute_ring[i];
4996
4997                 r = amdgpu_bo_reserve(ring->mqd_obj, false);
4998                 if (unlikely(r != 0))
4999                         goto done;
5000                 r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
5001                 if (!r) {
5002                         r = gfx_v8_0_kcq_init_queue(ring);
5003                         amdgpu_bo_kunmap(ring->mqd_obj);
5004                         ring->mqd_ptr = NULL;
5005                 }
5006                 amdgpu_bo_unreserve(ring->mqd_obj);
5007                 if (r)
5008                         goto done;
5009         }
5010
5011         gfx_v8_0_set_mec_doorbell_range(adev);
5012
5013         r = gfx_v8_0_kiq_kcq_enable(adev);
5014         if (r)
5015                 goto done;
5016
5017         /* Test KIQ */
5018         ring = &adev->gfx.kiq.ring;
5019         ring->ready = true;
5020         r = amdgpu_ring_test_ring(ring);
5021         if (r) {
5022                 ring->ready = false;
5023                 goto done;
5024         }
5025
5026         /* Test KCQs */
5027         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5028                 ring = &adev->gfx.compute_ring[i];
5029                 ring->ready = true;
5030                 r = amdgpu_ring_test_ring(ring);
5031                 if (r)
5032                         ring->ready = false;
5033         }
5034
5035 done:
5036         return r;
5037 }
5038
5039 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
5040 {
5041         int r;
5042
5043         if (!(adev->flags & AMD_IS_APU))
5044                 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5045
5046         if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
5047                         /* legacy firmware loading */
5048                 r = gfx_v8_0_cp_gfx_load_microcode(adev);
5049                 if (r)
5050                         return r;
5051
5052                 r = gfx_v8_0_cp_compute_load_microcode(adev);
5053                 if (r)
5054                         return r;
5055         }
5056
5057         r = gfx_v8_0_cp_gfx_resume(adev);
5058         if (r)
5059                 return r;
5060
5061         r = gfx_v8_0_kiq_resume(adev);
5062         if (r)
5063                 return r;
5064
5065         gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5066
5067         return 0;
5068 }
5069
5070 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
5071 {
5072         gfx_v8_0_cp_gfx_enable(adev, enable);
5073         gfx_v8_0_cp_compute_enable(adev, enable);
5074 }
5075
5076 static int gfx_v8_0_hw_init(void *handle)
5077 {
5078         int r;
5079         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5080
5081         gfx_v8_0_init_golden_registers(adev);
5082         gfx_v8_0_gpu_init(adev);
5083
5084         r = gfx_v8_0_rlc_resume(adev);
5085         if (r)
5086                 return r;
5087
5088         r = gfx_v8_0_cp_resume(adev);
5089
5090         return r;
5091 }
5092
5093 static int gfx_v8_0_kcq_disable(struct amdgpu_ring *kiq_ring,struct amdgpu_ring *ring)
5094 {
5095         struct amdgpu_device *adev = kiq_ring->adev;
5096         uint32_t scratch, tmp = 0;
5097         int r, i;
5098
5099         r = amdgpu_gfx_scratch_get(adev, &scratch);
5100         if (r) {
5101                 DRM_ERROR("Failed to get scratch reg (%d).\n", r);
5102                 return r;
5103         }
5104         WREG32(scratch, 0xCAFEDEAD);
5105
5106         r = amdgpu_ring_alloc(kiq_ring, 10);
5107         if (r) {
5108                 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
5109                 amdgpu_gfx_scratch_free(adev, scratch);
5110                 return r;
5111         }
5112
5113         /* unmap queues */
5114         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
5115         amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
5116                                                 PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */
5117                                                 PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
5118                                                 PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) |
5119                                                 PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
5120         amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
5121         amdgpu_ring_write(kiq_ring, 0);
5122         amdgpu_ring_write(kiq_ring, 0);
5123         amdgpu_ring_write(kiq_ring, 0);
5124         /* write to scratch for completion */
5125         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
5126         amdgpu_ring_write(kiq_ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
5127         amdgpu_ring_write(kiq_ring, 0xDEADBEEF);
5128         amdgpu_ring_commit(kiq_ring);
5129
5130         for (i = 0; i < adev->usec_timeout; i++) {
5131                 tmp = RREG32(scratch);
5132                 if (tmp == 0xDEADBEEF)
5133                         break;
5134                 DRM_UDELAY(1);
5135         }
5136         if (i >= adev->usec_timeout) {
5137                 DRM_ERROR("KCQ disabled failed (scratch(0x%04X)=0x%08X)\n", scratch, tmp);
5138                 r = -EINVAL;
5139         }
5140         amdgpu_gfx_scratch_free(adev, scratch);
5141         return r;
5142 }
5143
5144 static int gfx_v8_0_hw_fini(void *handle)
5145 {
5146         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5147         int i;
5148
5149         amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
5150         amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
5151
5152         amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
5153
5154         amdgpu_irq_put(adev, &adev->gfx.sq_irq, 0);
5155
5156         /* disable KCQ to avoid CPC touch memory not valid anymore */
5157         for (i = 0; i < adev->gfx.num_compute_rings; i++)
5158                 gfx_v8_0_kcq_disable(&adev->gfx.kiq.ring, &adev->gfx.compute_ring[i]);
5159
5160         if (amdgpu_sriov_vf(adev)) {
5161                 pr_debug("For SRIOV client, shouldn't do anything.\n");
5162                 return 0;
5163         }
5164         gfx_v8_0_cp_enable(adev, false);
5165         gfx_v8_0_rlc_stop(adev);
5166
5167         amdgpu_device_ip_set_powergating_state(adev,
5168                                                AMD_IP_BLOCK_TYPE_GFX,
5169                                                AMD_PG_STATE_UNGATE);
5170
5171         return 0;
5172 }
5173
5174 static int gfx_v8_0_suspend(void *handle)
5175 {
5176         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5177         adev->gfx.in_suspend = true;
5178         return gfx_v8_0_hw_fini(adev);
5179 }
5180
5181 static int gfx_v8_0_resume(void *handle)
5182 {
5183         int r;
5184         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5185
5186         r = gfx_v8_0_hw_init(adev);
5187         adev->gfx.in_suspend = false;
5188         return r;
5189 }
5190
5191 static bool gfx_v8_0_is_idle(void *handle)
5192 {
5193         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5194
5195         if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE))
5196                 return false;
5197         else
5198                 return true;
5199 }
5200
5201 static int gfx_v8_0_wait_for_idle(void *handle)
5202 {
5203         unsigned i;
5204         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5205
5206         for (i = 0; i < adev->usec_timeout; i++) {
5207                 if (gfx_v8_0_is_idle(handle))
5208                         return 0;
5209
5210                 udelay(1);
5211         }
5212         return -ETIMEDOUT;
5213 }
5214
5215 static bool gfx_v8_0_check_soft_reset(void *handle)
5216 {
5217         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5218         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5219         u32 tmp;
5220
5221         /* GRBM_STATUS */
5222         tmp = RREG32(mmGRBM_STATUS);
5223         if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
5224                    GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
5225                    GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
5226                    GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
5227                    GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
5228                    GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK |
5229                    GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
5230                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5231                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
5232                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5233                                                 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
5234                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5235                                                 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5236         }
5237
5238         /* GRBM_STATUS2 */
5239         tmp = RREG32(mmGRBM_STATUS2);
5240         if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
5241                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5242                                                 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
5243
5244         if (REG_GET_FIELD(tmp, GRBM_STATUS2, CPF_BUSY) ||
5245             REG_GET_FIELD(tmp, GRBM_STATUS2, CPC_BUSY) ||
5246             REG_GET_FIELD(tmp, GRBM_STATUS2, CPG_BUSY)) {
5247                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5248                                                 SOFT_RESET_CPF, 1);
5249                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5250                                                 SOFT_RESET_CPC, 1);
5251                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5252                                                 SOFT_RESET_CPG, 1);
5253                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET,
5254                                                 SOFT_RESET_GRBM, 1);
5255         }
5256
5257         /* SRBM_STATUS */
5258         tmp = RREG32(mmSRBM_STATUS);
5259         if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
5260                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5261                                                 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5262         if (REG_GET_FIELD(tmp, SRBM_STATUS, SEM_BUSY))
5263                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5264                                                 SRBM_SOFT_RESET, SOFT_RESET_SEM, 1);
5265
5266         if (grbm_soft_reset || srbm_soft_reset) {
5267                 adev->gfx.grbm_soft_reset = grbm_soft_reset;
5268                 adev->gfx.srbm_soft_reset = srbm_soft_reset;
5269                 return true;
5270         } else {
5271                 adev->gfx.grbm_soft_reset = 0;
5272                 adev->gfx.srbm_soft_reset = 0;
5273                 return false;
5274         }
5275 }
5276
5277 static int gfx_v8_0_pre_soft_reset(void *handle)
5278 {
5279         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5280         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5281
5282         if ((!adev->gfx.grbm_soft_reset) &&
5283             (!adev->gfx.srbm_soft_reset))
5284                 return 0;
5285
5286         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5287         srbm_soft_reset = adev->gfx.srbm_soft_reset;
5288
5289         /* stop the rlc */
5290         gfx_v8_0_rlc_stop(adev);
5291
5292         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5293             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5294                 /* Disable GFX parsing/prefetching */
5295                 gfx_v8_0_cp_gfx_enable(adev, false);
5296
5297         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5298             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5299             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5300             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5301                 int i;
5302
5303                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5304                         struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5305
5306                         mutex_lock(&adev->srbm_mutex);
5307                         vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5308                         gfx_v8_0_deactivate_hqd(adev, 2);
5309                         vi_srbm_select(adev, 0, 0, 0, 0);
5310                         mutex_unlock(&adev->srbm_mutex);
5311                 }
5312                 /* Disable MEC parsing/prefetching */
5313                 gfx_v8_0_cp_compute_enable(adev, false);
5314         }
5315
5316        return 0;
5317 }
5318
5319 static int gfx_v8_0_soft_reset(void *handle)
5320 {
5321         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5322         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5323         u32 tmp;
5324
5325         if ((!adev->gfx.grbm_soft_reset) &&
5326             (!adev->gfx.srbm_soft_reset))
5327                 return 0;
5328
5329         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5330         srbm_soft_reset = adev->gfx.srbm_soft_reset;
5331
5332         if (grbm_soft_reset || srbm_soft_reset) {
5333                 tmp = RREG32(mmGMCON_DEBUG);
5334                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 1);
5335                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 1);
5336                 WREG32(mmGMCON_DEBUG, tmp);
5337                 udelay(50);
5338         }
5339
5340         if (grbm_soft_reset) {
5341                 tmp = RREG32(mmGRBM_SOFT_RESET);
5342                 tmp |= grbm_soft_reset;
5343                 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5344                 WREG32(mmGRBM_SOFT_RESET, tmp);
5345                 tmp = RREG32(mmGRBM_SOFT_RESET);
5346
5347                 udelay(50);
5348
5349                 tmp &= ~grbm_soft_reset;
5350                 WREG32(mmGRBM_SOFT_RESET, tmp);
5351                 tmp = RREG32(mmGRBM_SOFT_RESET);
5352         }
5353
5354         if (srbm_soft_reset) {
5355                 tmp = RREG32(mmSRBM_SOFT_RESET);
5356                 tmp |= srbm_soft_reset;
5357                 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5358                 WREG32(mmSRBM_SOFT_RESET, tmp);
5359                 tmp = RREG32(mmSRBM_SOFT_RESET);
5360
5361                 udelay(50);
5362
5363                 tmp &= ~srbm_soft_reset;
5364                 WREG32(mmSRBM_SOFT_RESET, tmp);
5365                 tmp = RREG32(mmSRBM_SOFT_RESET);
5366         }
5367
5368         if (grbm_soft_reset || srbm_soft_reset) {
5369                 tmp = RREG32(mmGMCON_DEBUG);
5370                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 0);
5371                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 0);
5372                 WREG32(mmGMCON_DEBUG, tmp);
5373         }
5374
5375         /* Wait a little for things to settle down */
5376         udelay(50);
5377
5378         return 0;
5379 }
5380
5381 static int gfx_v8_0_post_soft_reset(void *handle)
5382 {
5383         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5384         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5385
5386         if ((!adev->gfx.grbm_soft_reset) &&
5387             (!adev->gfx.srbm_soft_reset))
5388                 return 0;
5389
5390         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5391         srbm_soft_reset = adev->gfx.srbm_soft_reset;
5392
5393         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5394             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5395                 gfx_v8_0_cp_gfx_resume(adev);
5396
5397         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5398             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5399             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5400             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5401                 int i;
5402
5403                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5404                         struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5405
5406                         mutex_lock(&adev->srbm_mutex);
5407                         vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5408                         gfx_v8_0_deactivate_hqd(adev, 2);
5409                         vi_srbm_select(adev, 0, 0, 0, 0);
5410                         mutex_unlock(&adev->srbm_mutex);
5411                 }
5412                 gfx_v8_0_kiq_resume(adev);
5413         }
5414         gfx_v8_0_rlc_start(adev);
5415
5416         return 0;
5417 }
5418
5419 /**
5420  * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
5421  *
5422  * @adev: amdgpu_device pointer
5423  *
5424  * Fetches a GPU clock counter snapshot.
5425  * Returns the 64 bit clock counter snapshot.
5426  */
5427 static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
5428 {
5429         uint64_t clock;
5430
5431         mutex_lock(&adev->gfx.gpu_clock_mutex);
5432         WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
5433         clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
5434                 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5435         mutex_unlock(&adev->gfx.gpu_clock_mutex);
5436         return clock;
5437 }
5438
5439 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
5440                                           uint32_t vmid,
5441                                           uint32_t gds_base, uint32_t gds_size,
5442                                           uint32_t gws_base, uint32_t gws_size,
5443                                           uint32_t oa_base, uint32_t oa_size)
5444 {
5445         gds_base = gds_base >> AMDGPU_GDS_SHIFT;
5446         gds_size = gds_size >> AMDGPU_GDS_SHIFT;
5447
5448         gws_base = gws_base >> AMDGPU_GWS_SHIFT;
5449         gws_size = gws_size >> AMDGPU_GWS_SHIFT;
5450
5451         oa_base = oa_base >> AMDGPU_OA_SHIFT;
5452         oa_size = oa_size >> AMDGPU_OA_SHIFT;
5453
5454         /* GDS Base */
5455         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5456         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5457                                 WRITE_DATA_DST_SEL(0)));
5458         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
5459         amdgpu_ring_write(ring, 0);
5460         amdgpu_ring_write(ring, gds_base);
5461
5462         /* GDS Size */
5463         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5464         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5465                                 WRITE_DATA_DST_SEL(0)));
5466         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
5467         amdgpu_ring_write(ring, 0);
5468         amdgpu_ring_write(ring, gds_size);
5469
5470         /* GWS */
5471         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5472         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5473                                 WRITE_DATA_DST_SEL(0)));
5474         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
5475         amdgpu_ring_write(ring, 0);
5476         amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
5477
5478         /* OA */
5479         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5480         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5481                                 WRITE_DATA_DST_SEL(0)));
5482         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
5483         amdgpu_ring_write(ring, 0);
5484         amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
5485 }
5486
5487 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
5488 {
5489         WREG32(mmSQ_IND_INDEX,
5490                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5491                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5492                 (address << SQ_IND_INDEX__INDEX__SHIFT) |
5493                 (SQ_IND_INDEX__FORCE_READ_MASK));
5494         return RREG32(mmSQ_IND_DATA);
5495 }
5496
5497 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
5498                            uint32_t wave, uint32_t thread,
5499                            uint32_t regno, uint32_t num, uint32_t *out)
5500 {
5501         WREG32(mmSQ_IND_INDEX,
5502                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5503                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5504                 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
5505                 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
5506                 (SQ_IND_INDEX__FORCE_READ_MASK) |
5507                 (SQ_IND_INDEX__AUTO_INCR_MASK));
5508         while (num--)
5509                 *(out++) = RREG32(mmSQ_IND_DATA);
5510 }
5511
5512 static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
5513 {
5514         /* type 0 wave data */
5515         dst[(*no_fields)++] = 0;
5516         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
5517         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
5518         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
5519         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
5520         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
5521         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
5522         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
5523         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
5524         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
5525         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
5526         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
5527         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
5528         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_LO);
5529         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_HI);
5530         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_LO);
5531         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI);
5532         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
5533         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
5534 }
5535
5536 static void gfx_v8_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
5537                                      uint32_t wave, uint32_t start,
5538                                      uint32_t size, uint32_t *dst)
5539 {
5540         wave_read_regs(
5541                 adev, simd, wave, 0,
5542                 start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
5543 }
5544
5545
5546 static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = {
5547         .get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter,
5548         .select_se_sh = &gfx_v8_0_select_se_sh,
5549         .read_wave_data = &gfx_v8_0_read_wave_data,
5550         .read_wave_sgprs = &gfx_v8_0_read_wave_sgprs,
5551         .select_me_pipe_q = &gfx_v8_0_select_me_pipe_q
5552 };
5553
5554 static int gfx_v8_0_early_init(void *handle)
5555 {
5556         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5557
5558         adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
5559         adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
5560         adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
5561         gfx_v8_0_set_ring_funcs(adev);
5562         gfx_v8_0_set_irq_funcs(adev);
5563         gfx_v8_0_set_gds_init(adev);
5564         gfx_v8_0_set_rlc_funcs(adev);
5565
5566         return 0;
5567 }
5568
5569 static int gfx_v8_0_late_init(void *handle)
5570 {
5571         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5572         int r;
5573
5574         r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
5575         if (r)
5576                 return r;
5577
5578         r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
5579         if (r)
5580                 return r;
5581
5582         /* requires IBs so do in late init after IB pool is initialized */
5583         r = gfx_v8_0_do_edc_gpr_workarounds(adev);
5584         if (r)
5585                 return r;
5586
5587         r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0);
5588         if (r) {
5589                 DRM_ERROR("amdgpu_irq_get() failed to get IRQ for EDC, r: %d.\n", r);
5590                 return r;
5591         }
5592
5593         r = amdgpu_irq_get(adev, &adev->gfx.sq_irq, 0);
5594         if (r) {
5595                 DRM_ERROR(
5596                         "amdgpu_irq_get() failed to get IRQ for SQ, r: %d.\n",
5597                         r);
5598                 return r;
5599         }
5600
5601         return 0;
5602 }
5603
5604 static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
5605                                                        bool enable)
5606 {
5607         if (((adev->asic_type == CHIP_POLARIS11) ||
5608             (adev->asic_type == CHIP_POLARIS12) ||
5609             (adev->asic_type == CHIP_VEGAM)) &&
5610             adev->powerplay.pp_funcs->set_powergating_by_smu)
5611                 /* Send msg to SMU via Powerplay */
5612                 amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, enable);
5613
5614         WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0);
5615 }
5616
5617 static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
5618                                                         bool enable)
5619 {
5620         WREG32_FIELD(RLC_PG_CNTL, DYN_PER_CU_PG_ENABLE, enable ? 1 : 0);
5621 }
5622
5623 static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
5624                 bool enable)
5625 {
5626         WREG32_FIELD(RLC_PG_CNTL, QUICK_PG_ENABLE, enable ? 1 : 0);
5627 }
5628
5629 static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
5630                                           bool enable)
5631 {
5632         WREG32_FIELD(RLC_PG_CNTL, GFX_POWER_GATING_ENABLE, enable ? 1 : 0);
5633 }
5634
5635 static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev,
5636                                                 bool enable)
5637 {
5638         WREG32_FIELD(RLC_PG_CNTL, GFX_PIPELINE_PG_ENABLE, enable ? 1 : 0);
5639
5640         /* Read any GFX register to wake up GFX. */
5641         if (!enable)
5642                 RREG32(mmDB_RENDER_CONTROL);
5643 }
5644
5645 static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev,
5646                                           bool enable)
5647 {
5648         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
5649                 cz_enable_gfx_cg_power_gating(adev, true);
5650                 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
5651                         cz_enable_gfx_pipeline_power_gating(adev, true);
5652         } else {
5653                 cz_enable_gfx_cg_power_gating(adev, false);
5654                 cz_enable_gfx_pipeline_power_gating(adev, false);
5655         }
5656 }
5657
5658 static int gfx_v8_0_set_powergating_state(void *handle,
5659                                           enum amd_powergating_state state)
5660 {
5661         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5662         bool enable = (state == AMD_PG_STATE_GATE);
5663
5664         if (amdgpu_sriov_vf(adev))
5665                 return 0;
5666
5667         switch (adev->asic_type) {
5668         case CHIP_CARRIZO:
5669         case CHIP_STONEY:
5670
5671                 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
5672                         cz_enable_sck_slow_down_on_power_up(adev, true);
5673                         cz_enable_sck_slow_down_on_power_down(adev, true);
5674                 } else {
5675                         cz_enable_sck_slow_down_on_power_up(adev, false);
5676                         cz_enable_sck_slow_down_on_power_down(adev, false);
5677                 }
5678                 if (adev->pg_flags & AMD_PG_SUPPORT_CP)
5679                         cz_enable_cp_power_gating(adev, true);
5680                 else
5681                         cz_enable_cp_power_gating(adev, false);
5682
5683                 cz_update_gfx_cg_power_gating(adev, enable);
5684
5685                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5686                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5687                 else
5688                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5689
5690                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5691                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5692                 else
5693                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5694                 break;
5695         case CHIP_POLARIS11:
5696         case CHIP_POLARIS12:
5697         case CHIP_VEGAM:
5698                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5699                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5700                 else
5701                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5702
5703                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5704                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5705                 else
5706                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5707
5708                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable)
5709                         polaris11_enable_gfx_quick_mg_power_gating(adev, true);
5710                 else
5711                         polaris11_enable_gfx_quick_mg_power_gating(adev, false);
5712                 break;
5713         default:
5714                 break;
5715         }
5716
5717         return 0;
5718 }
5719
5720 static void gfx_v8_0_get_clockgating_state(void *handle, u32 *flags)
5721 {
5722         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5723         int data;
5724
5725         if (amdgpu_sriov_vf(adev))
5726                 *flags = 0;
5727
5728         /* AMD_CG_SUPPORT_GFX_MGCG */
5729         data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5730         if (!(data & RLC_CGTT_MGCG_OVERRIDE__CPF_MASK))
5731                 *flags |= AMD_CG_SUPPORT_GFX_MGCG;
5732
5733         /* AMD_CG_SUPPORT_GFX_CGLG */
5734         data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5735         if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5736                 *flags |= AMD_CG_SUPPORT_GFX_CGCG;
5737
5738         /* AMD_CG_SUPPORT_GFX_CGLS */
5739         if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5740                 *flags |= AMD_CG_SUPPORT_GFX_CGLS;
5741
5742         /* AMD_CG_SUPPORT_GFX_CGTS */
5743         data = RREG32(mmCGTS_SM_CTRL_REG);
5744         if (!(data & CGTS_SM_CTRL_REG__OVERRIDE_MASK))
5745                 *flags |= AMD_CG_SUPPORT_GFX_CGTS;
5746
5747         /* AMD_CG_SUPPORT_GFX_CGTS_LS */
5748         if (!(data & CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK))
5749                 *flags |= AMD_CG_SUPPORT_GFX_CGTS_LS;
5750
5751         /* AMD_CG_SUPPORT_GFX_RLC_LS */
5752         data = RREG32(mmRLC_MEM_SLP_CNTL);
5753         if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
5754                 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
5755
5756         /* AMD_CG_SUPPORT_GFX_CP_LS */
5757         data = RREG32(mmCP_MEM_SLP_CNTL);
5758         if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
5759                 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
5760 }
5761
5762 static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
5763                                      uint32_t reg_addr, uint32_t cmd)
5764 {
5765         uint32_t data;
5766
5767         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
5768
5769         WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5770         WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5771
5772         data = RREG32(mmRLC_SERDES_WR_CTRL);
5773         if (adev->asic_type == CHIP_STONEY)
5774                 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5775                           RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5776                           RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5777                           RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5778                           RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5779                           RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5780                           RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5781                           RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5782                           RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5783         else
5784                 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5785                           RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5786                           RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5787                           RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5788                           RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5789                           RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5790                           RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5791                           RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5792                           RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
5793                           RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
5794                           RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5795         data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
5796                  (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
5797                  (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
5798                  (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
5799
5800         WREG32(mmRLC_SERDES_WR_CTRL, data);
5801 }
5802
5803 #define MSG_ENTER_RLC_SAFE_MODE     1
5804 #define MSG_EXIT_RLC_SAFE_MODE      0
5805 #define RLC_GPR_REG2__REQ_MASK 0x00000001
5806 #define RLC_GPR_REG2__REQ__SHIFT 0
5807 #define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001
5808 #define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e
5809
5810 static void iceland_enter_rlc_safe_mode(struct amdgpu_device *adev)
5811 {
5812         u32 data;
5813         unsigned i;
5814
5815         data = RREG32(mmRLC_CNTL);
5816         if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5817                 return;
5818
5819         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5820                 data |= RLC_SAFE_MODE__CMD_MASK;
5821                 data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5822                 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
5823                 WREG32(mmRLC_SAFE_MODE, data);
5824
5825                 for (i = 0; i < adev->usec_timeout; i++) {
5826                         if ((RREG32(mmRLC_GPM_STAT) &
5827                              (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5828                               RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5829                             (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5830                              RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5831                                 break;
5832                         udelay(1);
5833                 }
5834
5835                 for (i = 0; i < adev->usec_timeout; i++) {
5836                         if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5837                                 break;
5838                         udelay(1);
5839                 }
5840                 adev->gfx.rlc.in_safe_mode = true;
5841         }
5842 }
5843
5844 static void iceland_exit_rlc_safe_mode(struct amdgpu_device *adev)
5845 {
5846         u32 data = 0;
5847         unsigned i;
5848
5849         data = RREG32(mmRLC_CNTL);
5850         if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5851                 return;
5852
5853         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5854                 if (adev->gfx.rlc.in_safe_mode) {
5855                         data |= RLC_SAFE_MODE__CMD_MASK;
5856                         data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5857                         WREG32(mmRLC_SAFE_MODE, data);
5858                         adev->gfx.rlc.in_safe_mode = false;
5859                 }
5860         }
5861
5862         for (i = 0; i < adev->usec_timeout; i++) {
5863                 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5864                         break;
5865                 udelay(1);
5866         }
5867 }
5868
5869 static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
5870         .enter_safe_mode = iceland_enter_rlc_safe_mode,
5871         .exit_safe_mode = iceland_exit_rlc_safe_mode
5872 };
5873
5874 static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
5875                                                       bool enable)
5876 {
5877         uint32_t temp, data;
5878
5879         adev->gfx.rlc.funcs->enter_safe_mode(adev);
5880
5881         /* It is disabled by HW by default */
5882         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
5883                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5884                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS)
5885                                 /* 1 - RLC memory Light sleep */
5886                                 WREG32_FIELD(RLC_MEM_SLP_CNTL, RLC_MEM_LS_EN, 1);
5887
5888                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS)
5889                                 WREG32_FIELD(CP_MEM_SLP_CNTL, CP_MEM_LS_EN, 1);
5890                 }
5891
5892                 /* 3 - RLC_CGTT_MGCG_OVERRIDE */
5893                 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5894                 if (adev->flags & AMD_IS_APU)
5895                         data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5896                                   RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5897                                   RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK);
5898                 else
5899                         data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5900                                   RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5901                                   RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5902                                   RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5903
5904                 if (temp != data)
5905                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5906
5907                 /* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5908                 gfx_v8_0_wait_for_rlc_serdes(adev);
5909
5910                 /* 5 - clear mgcg override */
5911                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5912
5913                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
5914                         /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
5915                         temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5916                         data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
5917                         data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
5918                         data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
5919                         data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
5920                         if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
5921                             (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
5922                                 data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
5923                         data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
5924                         data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
5925                         if (temp != data)
5926                                 WREG32(mmCGTS_SM_CTRL_REG, data);
5927                 }
5928                 udelay(50);
5929
5930                 /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5931                 gfx_v8_0_wait_for_rlc_serdes(adev);
5932         } else {
5933                 /* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
5934                 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5935                 data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5936                                 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5937                                 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5938                                 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5939                 if (temp != data)
5940                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5941
5942                 /* 2 - disable MGLS in RLC */
5943                 data = RREG32(mmRLC_MEM_SLP_CNTL);
5944                 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
5945                         data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5946                         WREG32(mmRLC_MEM_SLP_CNTL, data);
5947                 }
5948
5949                 /* 3 - disable MGLS in CP */
5950                 data = RREG32(mmCP_MEM_SLP_CNTL);
5951                 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
5952                         data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5953                         WREG32(mmCP_MEM_SLP_CNTL, data);
5954                 }
5955
5956                 /* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
5957                 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5958                 data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
5959                                 CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
5960                 if (temp != data)
5961                         WREG32(mmCGTS_SM_CTRL_REG, data);
5962
5963                 /* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5964                 gfx_v8_0_wait_for_rlc_serdes(adev);
5965
5966                 /* 6 - set mgcg override */
5967                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5968
5969                 udelay(50);
5970
5971                 /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5972                 gfx_v8_0_wait_for_rlc_serdes(adev);
5973         }
5974
5975         adev->gfx.rlc.funcs->exit_safe_mode(adev);
5976 }
5977
5978 static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5979                                                       bool enable)
5980 {
5981         uint32_t temp, temp1, data, data1;
5982
5983         temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5984
5985         adev->gfx.rlc.funcs->enter_safe_mode(adev);
5986
5987         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
5988                 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5989                 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
5990                 if (temp1 != data1)
5991                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5992
5993                 /* : wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5994                 gfx_v8_0_wait_for_rlc_serdes(adev);
5995
5996                 /* 2 - clear cgcg override */
5997                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5998
5999                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6000                 gfx_v8_0_wait_for_rlc_serdes(adev);
6001
6002                 /* 3 - write cmd to set CGLS */
6003                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
6004
6005                 /* 4 - enable cgcg */
6006                 data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
6007
6008                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
6009                         /* enable cgls*/
6010                         data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
6011
6012                         temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
6013                         data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
6014
6015                         if (temp1 != data1)
6016                                 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
6017                 } else {
6018                         data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
6019                 }
6020
6021                 if (temp != data)
6022                         WREG32(mmRLC_CGCG_CGLS_CTRL, data);
6023
6024                 /* 5 enable cntx_empty_int_enable/cntx_busy_int_enable/
6025                  * Cmp_busy/GFX_Idle interrupts
6026                  */
6027                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
6028         } else {
6029                 /* disable cntx_empty_int_enable & GFX Idle interrupt */
6030                 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
6031
6032                 /* TEST CGCG */
6033                 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
6034                 data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
6035                                 RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
6036                 if (temp1 != data1)
6037                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
6038
6039                 /* read gfx register to wake up cgcg */
6040                 RREG32(mmCB_CGTT_SCLK_CTRL);
6041                 RREG32(mmCB_CGTT_SCLK_CTRL);
6042                 RREG32(mmCB_CGTT_SCLK_CTRL);
6043                 RREG32(mmCB_CGTT_SCLK_CTRL);
6044
6045                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6046                 gfx_v8_0_wait_for_rlc_serdes(adev);
6047
6048                 /* write cmd to Set CGCG Overrride */
6049                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
6050
6051                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6052                 gfx_v8_0_wait_for_rlc_serdes(adev);
6053
6054                 /* write cmd to Clear CGLS */
6055                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
6056
6057                 /* disable cgcg, cgls should be disabled too. */
6058                 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
6059                           RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
6060                 if (temp != data)
6061                         WREG32(mmRLC_CGCG_CGLS_CTRL, data);
6062                 /* enable interrupts again for PG */
6063                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
6064         }
6065
6066         gfx_v8_0_wait_for_rlc_serdes(adev);
6067
6068         adev->gfx.rlc.funcs->exit_safe_mode(adev);
6069 }
6070 static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
6071                                             bool enable)
6072 {
6073         if (enable) {
6074                 /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
6075                  * ===  MGCG + MGLS + TS(CG/LS) ===
6076                  */
6077                 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
6078                 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
6079         } else {
6080                 /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
6081                  * ===  CGCG + CGLS ===
6082                  */
6083                 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
6084                 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
6085         }
6086         return 0;
6087 }
6088
6089 static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev,
6090                                           enum amd_clockgating_state state)
6091 {
6092         uint32_t msg_id, pp_state = 0;
6093         uint32_t pp_support_state = 0;
6094
6095         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
6096                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
6097                         pp_support_state = PP_STATE_SUPPORT_LS;
6098                         pp_state = PP_STATE_LS;
6099                 }
6100                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
6101                         pp_support_state |= PP_STATE_SUPPORT_CG;
6102                         pp_state |= PP_STATE_CG;
6103                 }
6104                 if (state == AMD_CG_STATE_UNGATE)
6105                         pp_state = 0;
6106
6107                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6108                                 PP_BLOCK_GFX_CG,
6109                                 pp_support_state,
6110                                 pp_state);
6111                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6112                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6113         }
6114
6115         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
6116                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
6117                         pp_support_state = PP_STATE_SUPPORT_LS;
6118                         pp_state = PP_STATE_LS;
6119                 }
6120
6121                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
6122                         pp_support_state |= PP_STATE_SUPPORT_CG;
6123                         pp_state |= PP_STATE_CG;
6124                 }
6125
6126                 if (state == AMD_CG_STATE_UNGATE)
6127                         pp_state = 0;
6128
6129                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6130                                 PP_BLOCK_GFX_MG,
6131                                 pp_support_state,
6132                                 pp_state);
6133                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6134                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6135         }
6136
6137         return 0;
6138 }
6139
6140 static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev,
6141                                           enum amd_clockgating_state state)
6142 {
6143
6144         uint32_t msg_id, pp_state = 0;
6145         uint32_t pp_support_state = 0;
6146
6147         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
6148                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
6149                         pp_support_state = PP_STATE_SUPPORT_LS;
6150                         pp_state = PP_STATE_LS;
6151                 }
6152                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
6153                         pp_support_state |= PP_STATE_SUPPORT_CG;
6154                         pp_state |= PP_STATE_CG;
6155                 }
6156                 if (state == AMD_CG_STATE_UNGATE)
6157                         pp_state = 0;
6158
6159                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6160                                 PP_BLOCK_GFX_CG,
6161                                 pp_support_state,
6162                                 pp_state);
6163                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6164                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6165         }
6166
6167         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_GFX_3D_CGLS)) {
6168                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) {
6169                         pp_support_state = PP_STATE_SUPPORT_LS;
6170                         pp_state = PP_STATE_LS;
6171                 }
6172                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) {
6173                         pp_support_state |= PP_STATE_SUPPORT_CG;
6174                         pp_state |= PP_STATE_CG;
6175                 }
6176                 if (state == AMD_CG_STATE_UNGATE)
6177                         pp_state = 0;
6178
6179                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6180                                 PP_BLOCK_GFX_3D,
6181                                 pp_support_state,
6182                                 pp_state);
6183                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6184                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6185         }
6186
6187         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
6188                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
6189                         pp_support_state = PP_STATE_SUPPORT_LS;
6190                         pp_state = PP_STATE_LS;
6191                 }
6192
6193                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
6194                         pp_support_state |= PP_STATE_SUPPORT_CG;
6195                         pp_state |= PP_STATE_CG;
6196                 }
6197
6198                 if (state == AMD_CG_STATE_UNGATE)
6199                         pp_state = 0;
6200
6201                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6202                                 PP_BLOCK_GFX_MG,
6203                                 pp_support_state,
6204                                 pp_state);
6205                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6206                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6207         }
6208
6209         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
6210                 pp_support_state = PP_STATE_SUPPORT_LS;
6211
6212                 if (state == AMD_CG_STATE_UNGATE)
6213                         pp_state = 0;
6214                 else
6215                         pp_state = PP_STATE_LS;
6216
6217                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6218                                 PP_BLOCK_GFX_RLC,
6219                                 pp_support_state,
6220                                 pp_state);
6221                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6222                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6223         }
6224
6225         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
6226                 pp_support_state = PP_STATE_SUPPORT_LS;
6227
6228                 if (state == AMD_CG_STATE_UNGATE)
6229                         pp_state = 0;
6230                 else
6231                         pp_state = PP_STATE_LS;
6232                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6233                         PP_BLOCK_GFX_CP,
6234                         pp_support_state,
6235                         pp_state);
6236                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6237                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6238         }
6239
6240         return 0;
6241 }
6242
6243 static int gfx_v8_0_set_clockgating_state(void *handle,
6244                                           enum amd_clockgating_state state)
6245 {
6246         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
6247
6248         if (amdgpu_sriov_vf(adev))
6249                 return 0;
6250
6251         switch (adev->asic_type) {
6252         case CHIP_FIJI:
6253         case CHIP_CARRIZO:
6254         case CHIP_STONEY:
6255                 gfx_v8_0_update_gfx_clock_gating(adev,
6256                                                  state == AMD_CG_STATE_GATE);
6257                 break;
6258         case CHIP_TONGA:
6259                 gfx_v8_0_tonga_update_gfx_clock_gating(adev, state);
6260                 break;
6261         case CHIP_POLARIS10:
6262         case CHIP_POLARIS11:
6263         case CHIP_POLARIS12:
6264         case CHIP_VEGAM:
6265                 gfx_v8_0_polaris_update_gfx_clock_gating(adev, state);
6266                 break;
6267         default:
6268                 break;
6269         }
6270         return 0;
6271 }
6272
6273 static u64 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring)
6274 {
6275         return ring->adev->wb.wb[ring->rptr_offs];
6276 }
6277
6278 static u64 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
6279 {
6280         struct amdgpu_device *adev = ring->adev;
6281
6282         if (ring->use_doorbell)
6283                 /* XXX check if swapping is necessary on BE */
6284                 return ring->adev->wb.wb[ring->wptr_offs];
6285         else
6286                 return RREG32(mmCP_RB0_WPTR);
6287 }
6288
6289 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
6290 {
6291         struct amdgpu_device *adev = ring->adev;
6292
6293         if (ring->use_doorbell) {
6294                 /* XXX check if swapping is necessary on BE */
6295                 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6296                 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6297         } else {
6298                 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
6299                 (void)RREG32(mmCP_RB0_WPTR);
6300         }
6301 }
6302
6303 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
6304 {
6305         u32 ref_and_mask, reg_mem_engine;
6306
6307         if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) ||
6308             (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)) {
6309                 switch (ring->me) {
6310                 case 1:
6311                         ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
6312                         break;
6313                 case 2:
6314                         ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
6315                         break;
6316                 default:
6317                         return;
6318                 }
6319                 reg_mem_engine = 0;
6320         } else {
6321                 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
6322                 reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
6323         }
6324
6325         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6326         amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
6327                                  WAIT_REG_MEM_FUNCTION(3) |  /* == */
6328                                  reg_mem_engine));
6329         amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
6330         amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
6331         amdgpu_ring_write(ring, ref_and_mask);
6332         amdgpu_ring_write(ring, ref_and_mask);
6333         amdgpu_ring_write(ring, 0x20); /* poll interval */
6334 }
6335
6336 static void gfx_v8_0_ring_emit_vgt_flush(struct amdgpu_ring *ring)
6337 {
6338         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6339         amdgpu_ring_write(ring, EVENT_TYPE(VS_PARTIAL_FLUSH) |
6340                 EVENT_INDEX(4));
6341
6342         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6343         amdgpu_ring_write(ring, EVENT_TYPE(VGT_FLUSH) |
6344                 EVENT_INDEX(0));
6345 }
6346
6347 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
6348                                       struct amdgpu_ib *ib,
6349                                       unsigned vmid, bool ctx_switch)
6350 {
6351         u32 header, control = 0;
6352
6353         if (ib->flags & AMDGPU_IB_FLAG_CE)
6354                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
6355         else
6356                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
6357
6358         control |= ib->length_dw | (vmid << 24);
6359
6360         if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
6361                 control |= INDIRECT_BUFFER_PRE_ENB(1);
6362
6363                 if (!(ib->flags & AMDGPU_IB_FLAG_CE))
6364                         gfx_v8_0_ring_emit_de_meta(ring);
6365         }
6366
6367         amdgpu_ring_write(ring, header);
6368         amdgpu_ring_write(ring,
6369 #ifdef __BIG_ENDIAN
6370                           (2 << 0) |
6371 #endif
6372                           (ib->gpu_addr & 0xFFFFFFFC));
6373         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6374         amdgpu_ring_write(ring, control);
6375 }
6376
6377 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
6378                                           struct amdgpu_ib *ib,
6379                                           unsigned vmid, bool ctx_switch)
6380 {
6381         u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
6382
6383         amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
6384         amdgpu_ring_write(ring,
6385 #ifdef __BIG_ENDIAN
6386                                 (2 << 0) |
6387 #endif
6388                                 (ib->gpu_addr & 0xFFFFFFFC));
6389         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6390         amdgpu_ring_write(ring, control);
6391 }
6392
6393 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
6394                                          u64 seq, unsigned flags)
6395 {
6396         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6397         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6398
6399         /* EVENT_WRITE_EOP - flush caches, send int */
6400         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6401         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6402                                  EOP_TC_ACTION_EN |
6403                                  EOP_TC_WB_ACTION_EN |
6404                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6405                                  EVENT_INDEX(5)));
6406         amdgpu_ring_write(ring, addr & 0xfffffffc);
6407         amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
6408                           DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6409         amdgpu_ring_write(ring, lower_32_bits(seq));
6410         amdgpu_ring_write(ring, upper_32_bits(seq));
6411
6412 }
6413
6414 static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
6415 {
6416         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6417         uint32_t seq = ring->fence_drv.sync_seq;
6418         uint64_t addr = ring->fence_drv.gpu_addr;
6419
6420         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6421         amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
6422                                  WAIT_REG_MEM_FUNCTION(3) | /* equal */
6423                                  WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
6424         amdgpu_ring_write(ring, addr & 0xfffffffc);
6425         amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
6426         amdgpu_ring_write(ring, seq);
6427         amdgpu_ring_write(ring, 0xffffffff);
6428         amdgpu_ring_write(ring, 4); /* poll interval */
6429 }
6430
6431 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
6432                                         unsigned vmid, uint64_t pd_addr)
6433 {
6434         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6435
6436         amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
6437
6438         /* wait for the invalidate to complete */
6439         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6440         amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6441                                  WAIT_REG_MEM_FUNCTION(0) |  /* always */
6442                                  WAIT_REG_MEM_ENGINE(0))); /* me */
6443         amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6444         amdgpu_ring_write(ring, 0);
6445         amdgpu_ring_write(ring, 0); /* ref */
6446         amdgpu_ring_write(ring, 0); /* mask */
6447         amdgpu_ring_write(ring, 0x20); /* poll interval */
6448
6449         /* compute doesn't have PFP */
6450         if (usepfp) {
6451                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
6452                 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6453                 amdgpu_ring_write(ring, 0x0);
6454         }
6455 }
6456
6457 static u64 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
6458 {
6459         return ring->adev->wb.wb[ring->wptr_offs];
6460 }
6461
6462 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
6463 {
6464         struct amdgpu_device *adev = ring->adev;
6465
6466         /* XXX check if swapping is necessary on BE */
6467         adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6468         WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6469 }
6470
6471 static void gfx_v8_0_ring_set_pipe_percent(struct amdgpu_ring *ring,
6472                                            bool acquire)
6473 {
6474         struct amdgpu_device *adev = ring->adev;
6475         int pipe_num, tmp, reg;
6476         int pipe_percent = acquire ? SPI_WCL_PIPE_PERCENT_GFX__VALUE_MASK : 0x1;
6477
6478         pipe_num = ring->me * adev->gfx.mec.num_pipe_per_mec + ring->pipe;
6479
6480         /* first me only has 2 entries, GFX and HP3D */
6481         if (ring->me > 0)
6482                 pipe_num -= 2;
6483
6484         reg = mmSPI_WCL_PIPE_PERCENT_GFX + pipe_num;
6485         tmp = RREG32(reg);
6486         tmp = REG_SET_FIELD(tmp, SPI_WCL_PIPE_PERCENT_GFX, VALUE, pipe_percent);
6487         WREG32(reg, tmp);
6488 }
6489
6490 static void gfx_v8_0_pipe_reserve_resources(struct amdgpu_device *adev,
6491                                             struct amdgpu_ring *ring,
6492                                             bool acquire)
6493 {
6494         int i, pipe;
6495         bool reserve;
6496         struct amdgpu_ring *iring;
6497
6498         mutex_lock(&adev->gfx.pipe_reserve_mutex);
6499         pipe = amdgpu_gfx_queue_to_bit(adev, ring->me, ring->pipe, 0);
6500         if (acquire)
6501                 set_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6502         else
6503                 clear_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6504
6505         if (!bitmap_weight(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)) {
6506                 /* Clear all reservations - everyone reacquires all resources */
6507                 for (i = 0; i < adev->gfx.num_gfx_rings; ++i)
6508                         gfx_v8_0_ring_set_pipe_percent(&adev->gfx.gfx_ring[i],
6509                                                        true);
6510
6511                 for (i = 0; i < adev->gfx.num_compute_rings; ++i)
6512                         gfx_v8_0_ring_set_pipe_percent(&adev->gfx.compute_ring[i],
6513                                                        true);
6514         } else {
6515                 /* Lower all pipes without a current reservation */
6516                 for (i = 0; i < adev->gfx.num_gfx_rings; ++i) {
6517                         iring = &adev->gfx.gfx_ring[i];
6518                         pipe = amdgpu_gfx_queue_to_bit(adev,
6519                                                        iring->me,
6520                                                        iring->pipe,
6521                                                        0);
6522                         reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6523                         gfx_v8_0_ring_set_pipe_percent(iring, reserve);
6524                 }
6525
6526                 for (i = 0; i < adev->gfx.num_compute_rings; ++i) {
6527                         iring = &adev->gfx.compute_ring[i];
6528                         pipe = amdgpu_gfx_queue_to_bit(adev,
6529                                                        iring->me,
6530                                                        iring->pipe,
6531                                                        0);
6532                         reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6533                         gfx_v8_0_ring_set_pipe_percent(iring, reserve);
6534                 }
6535         }
6536
6537         mutex_unlock(&adev->gfx.pipe_reserve_mutex);
6538 }
6539
6540 static void gfx_v8_0_hqd_set_priority(struct amdgpu_device *adev,
6541                                       struct amdgpu_ring *ring,
6542                                       bool acquire)
6543 {
6544         uint32_t pipe_priority = acquire ? 0x2 : 0x0;
6545         uint32_t queue_priority = acquire ? 0xf : 0x0;
6546
6547         mutex_lock(&adev->srbm_mutex);
6548         vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
6549
6550         WREG32(mmCP_HQD_PIPE_PRIORITY, pipe_priority);
6551         WREG32(mmCP_HQD_QUEUE_PRIORITY, queue_priority);
6552
6553         vi_srbm_select(adev, 0, 0, 0, 0);
6554         mutex_unlock(&adev->srbm_mutex);
6555 }
6556 static void gfx_v8_0_ring_set_priority_compute(struct amdgpu_ring *ring,
6557                                                enum drm_sched_priority priority)
6558 {
6559         struct amdgpu_device *adev = ring->adev;
6560         bool acquire = priority == DRM_SCHED_PRIORITY_HIGH_HW;
6561
6562         if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE)
6563                 return;
6564
6565         gfx_v8_0_hqd_set_priority(adev, ring, acquire);
6566         gfx_v8_0_pipe_reserve_resources(adev, ring, acquire);
6567 }
6568
6569 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
6570                                              u64 addr, u64 seq,
6571                                              unsigned flags)
6572 {
6573         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6574         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6575
6576         /* RELEASE_MEM - flush caches, send int */
6577         amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
6578         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6579                                  EOP_TC_ACTION_EN |
6580                                  EOP_TC_WB_ACTION_EN |
6581                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6582                                  EVENT_INDEX(5)));
6583         amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6584         amdgpu_ring_write(ring, addr & 0xfffffffc);
6585         amdgpu_ring_write(ring, upper_32_bits(addr));
6586         amdgpu_ring_write(ring, lower_32_bits(seq));
6587         amdgpu_ring_write(ring, upper_32_bits(seq));
6588 }
6589
6590 static void gfx_v8_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
6591                                          u64 seq, unsigned int flags)
6592 {
6593         /* we only allocate 32bit for each seq wb address */
6594         BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
6595
6596         /* write fence seq to the "addr" */
6597         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6598         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6599                                  WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
6600         amdgpu_ring_write(ring, lower_32_bits(addr));
6601         amdgpu_ring_write(ring, upper_32_bits(addr));
6602         amdgpu_ring_write(ring, lower_32_bits(seq));
6603
6604         if (flags & AMDGPU_FENCE_FLAG_INT) {
6605                 /* set register to trigger INT */
6606                 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6607                 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6608                                          WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
6609                 amdgpu_ring_write(ring, mmCPC_INT_STATUS);
6610                 amdgpu_ring_write(ring, 0);
6611                 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
6612         }
6613 }
6614
6615 static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring)
6616 {
6617         amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6618         amdgpu_ring_write(ring, 0);
6619 }
6620
6621 static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
6622 {
6623         uint32_t dw2 = 0;
6624
6625         if (amdgpu_sriov_vf(ring->adev))
6626                 gfx_v8_0_ring_emit_ce_meta(ring);
6627
6628         dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
6629         if (flags & AMDGPU_HAVE_CTX_SWITCH) {
6630                 gfx_v8_0_ring_emit_vgt_flush(ring);
6631                 /* set load_global_config & load_global_uconfig */
6632                 dw2 |= 0x8001;
6633                 /* set load_cs_sh_regs */
6634                 dw2 |= 0x01000000;
6635                 /* set load_per_context_state & load_gfx_sh_regs for GFX */
6636                 dw2 |= 0x10002;
6637
6638                 /* set load_ce_ram if preamble presented */
6639                 if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
6640                         dw2 |= 0x10000000;
6641         } else {
6642                 /* still load_ce_ram if this is the first time preamble presented
6643                  * although there is no context switch happens.
6644                  */
6645                 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
6646                         dw2 |= 0x10000000;
6647         }
6648
6649         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6650         amdgpu_ring_write(ring, dw2);
6651         amdgpu_ring_write(ring, 0);
6652 }
6653
6654 static unsigned gfx_v8_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
6655 {
6656         unsigned ret;
6657
6658         amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
6659         amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
6660         amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
6661         amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
6662         ret = ring->wptr & ring->buf_mask;
6663         amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
6664         return ret;
6665 }
6666
6667 static void gfx_v8_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
6668 {
6669         unsigned cur;
6670
6671         BUG_ON(offset > ring->buf_mask);
6672         BUG_ON(ring->ring[offset] != 0x55aa55aa);
6673
6674         cur = (ring->wptr & ring->buf_mask) - 1;
6675         if (likely(cur > offset))
6676                 ring->ring[offset] = cur - offset;
6677         else
6678                 ring->ring[offset] = (ring->ring_size >> 2) - offset + cur;
6679 }
6680
6681 static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
6682 {
6683         struct amdgpu_device *adev = ring->adev;
6684
6685         amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
6686         amdgpu_ring_write(ring, 0 |     /* src: register*/
6687                                 (5 << 8) |      /* dst: memory */
6688                                 (1 << 20));     /* write confirm */
6689         amdgpu_ring_write(ring, reg);
6690         amdgpu_ring_write(ring, 0);
6691         amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
6692                                 adev->virt.reg_val_offs * 4));
6693         amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
6694                                 adev->virt.reg_val_offs * 4));
6695 }
6696
6697 static void gfx_v8_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
6698                                   uint32_t val)
6699 {
6700         uint32_t cmd;
6701
6702         switch (ring->funcs->type) {
6703         case AMDGPU_RING_TYPE_GFX:
6704                 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
6705                 break;
6706         case AMDGPU_RING_TYPE_KIQ:
6707                 cmd = 1 << 16; /* no inc addr */
6708                 break;
6709         default:
6710                 cmd = WR_CONFIRM;
6711                 break;
6712         }
6713
6714         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6715         amdgpu_ring_write(ring, cmd);
6716         amdgpu_ring_write(ring, reg);
6717         amdgpu_ring_write(ring, 0);
6718         amdgpu_ring_write(ring, val);
6719 }
6720
6721 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
6722                                                  enum amdgpu_interrupt_state state)
6723 {
6724         WREG32_FIELD(CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE,
6725                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6726 }
6727
6728 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
6729                                                      int me, int pipe,
6730                                                      enum amdgpu_interrupt_state state)
6731 {
6732         u32 mec_int_cntl, mec_int_cntl_reg;
6733
6734         /*
6735          * amdgpu controls only the first MEC. That's why this function only
6736          * handles the setting of interrupts for this specific MEC. All other
6737          * pipes' interrupts are set by amdkfd.
6738          */
6739
6740         if (me == 1) {
6741                 switch (pipe) {
6742                 case 0:
6743                         mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
6744                         break;
6745                 case 1:
6746                         mec_int_cntl_reg = mmCP_ME1_PIPE1_INT_CNTL;
6747                         break;
6748                 case 2:
6749                         mec_int_cntl_reg = mmCP_ME1_PIPE2_INT_CNTL;
6750                         break;
6751                 case 3:
6752                         mec_int_cntl_reg = mmCP_ME1_PIPE3_INT_CNTL;
6753                         break;
6754                 default:
6755                         DRM_DEBUG("invalid pipe %d\n", pipe);
6756                         return;
6757                 }
6758         } else {
6759                 DRM_DEBUG("invalid me %d\n", me);
6760                 return;
6761         }
6762
6763         switch (state) {
6764         case AMDGPU_IRQ_STATE_DISABLE:
6765                 mec_int_cntl = RREG32(mec_int_cntl_reg);
6766                 mec_int_cntl &= ~CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6767                 WREG32(mec_int_cntl_reg, mec_int_cntl);
6768                 break;
6769         case AMDGPU_IRQ_STATE_ENABLE:
6770                 mec_int_cntl = RREG32(mec_int_cntl_reg);
6771                 mec_int_cntl |= CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6772                 WREG32(mec_int_cntl_reg, mec_int_cntl);
6773                 break;
6774         default:
6775                 break;
6776         }
6777 }
6778
6779 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
6780                                              struct amdgpu_irq_src *source,
6781                                              unsigned type,
6782                                              enum amdgpu_interrupt_state state)
6783 {
6784         WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE,
6785                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6786
6787         return 0;
6788 }
6789
6790 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
6791                                               struct amdgpu_irq_src *source,
6792                                               unsigned type,
6793                                               enum amdgpu_interrupt_state state)
6794 {
6795         WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE,
6796                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6797
6798         return 0;
6799 }
6800
6801 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6802                                             struct amdgpu_irq_src *src,
6803                                             unsigned type,
6804                                             enum amdgpu_interrupt_state state)
6805 {
6806         switch (type) {
6807         case AMDGPU_CP_IRQ_GFX_EOP:
6808                 gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
6809                 break;
6810         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6811                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6812                 break;
6813         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6814                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6815                 break;
6816         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6817                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6818                 break;
6819         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6820                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6821                 break;
6822         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6823                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6824                 break;
6825         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6826                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6827                 break;
6828         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6829                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6830                 break;
6831         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6832                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6833                 break;
6834         default:
6835                 break;
6836         }
6837         return 0;
6838 }
6839
6840 static int gfx_v8_0_set_cp_ecc_int_state(struct amdgpu_device *adev,
6841                                          struct amdgpu_irq_src *source,
6842                                          unsigned int type,
6843                                          enum amdgpu_interrupt_state state)
6844 {
6845         int enable_flag;
6846
6847         switch (state) {
6848         case AMDGPU_IRQ_STATE_DISABLE:
6849                 enable_flag = 0;
6850                 break;
6851
6852         case AMDGPU_IRQ_STATE_ENABLE:
6853                 enable_flag = 1;
6854                 break;
6855
6856         default:
6857                 return -EINVAL;
6858         }
6859
6860         WREG32_FIELD(CP_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6861         WREG32_FIELD(CP_INT_CNTL_RING0, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6862         WREG32_FIELD(CP_INT_CNTL_RING1, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6863         WREG32_FIELD(CP_INT_CNTL_RING2, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6864         WREG32_FIELD(CPC_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6865         WREG32_FIELD(CP_ME1_PIPE0_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6866                      enable_flag);
6867         WREG32_FIELD(CP_ME1_PIPE1_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6868                      enable_flag);
6869         WREG32_FIELD(CP_ME1_PIPE2_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6870                      enable_flag);
6871         WREG32_FIELD(CP_ME1_PIPE3_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6872                      enable_flag);
6873         WREG32_FIELD(CP_ME2_PIPE0_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6874                      enable_flag);
6875         WREG32_FIELD(CP_ME2_PIPE1_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6876                      enable_flag);
6877         WREG32_FIELD(CP_ME2_PIPE2_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6878                      enable_flag);
6879         WREG32_FIELD(CP_ME2_PIPE3_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6880                      enable_flag);
6881
6882         return 0;
6883 }
6884
6885 static int gfx_v8_0_set_sq_int_state(struct amdgpu_device *adev,
6886                                      struct amdgpu_irq_src *source,
6887                                      unsigned int type,
6888                                      enum amdgpu_interrupt_state state)
6889 {
6890         int enable_flag;
6891
6892         switch (state) {
6893         case AMDGPU_IRQ_STATE_DISABLE:
6894                 enable_flag = 1;
6895                 break;
6896
6897         case AMDGPU_IRQ_STATE_ENABLE:
6898                 enable_flag = 0;
6899                 break;
6900
6901         default:
6902                 return -EINVAL;
6903         }
6904
6905         WREG32_FIELD(SQ_INTERRUPT_MSG_CTRL, STALL,
6906                      enable_flag);
6907
6908         return 0;
6909 }
6910
6911 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
6912                             struct amdgpu_irq_src *source,
6913                             struct amdgpu_iv_entry *entry)
6914 {
6915         int i;
6916         u8 me_id, pipe_id, queue_id;
6917         struct amdgpu_ring *ring;
6918
6919         DRM_DEBUG("IH: CP EOP\n");
6920         me_id = (entry->ring_id & 0x0c) >> 2;
6921         pipe_id = (entry->ring_id & 0x03) >> 0;
6922         queue_id = (entry->ring_id & 0x70) >> 4;
6923
6924         switch (me_id) {
6925         case 0:
6926                 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6927                 break;
6928         case 1:
6929         case 2:
6930                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6931                         ring = &adev->gfx.compute_ring[i];
6932                         /* Per-queue interrupt is supported for MEC starting from VI.
6933                           * The interrupt can only be enabled/disabled per pipe instead of per queue.
6934                           */
6935                         if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6936                                 amdgpu_fence_process(ring);
6937                 }
6938                 break;
6939         }
6940         return 0;
6941 }
6942
6943 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
6944                                  struct amdgpu_irq_src *source,
6945                                  struct amdgpu_iv_entry *entry)
6946 {
6947         DRM_ERROR("Illegal register access in command stream\n");
6948         schedule_work(&adev->reset_work);
6949         return 0;
6950 }
6951
6952 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
6953                                   struct amdgpu_irq_src *source,
6954                                   struct amdgpu_iv_entry *entry)
6955 {
6956         DRM_ERROR("Illegal instruction in command stream\n");
6957         schedule_work(&adev->reset_work);
6958         return 0;
6959 }
6960
6961 static int gfx_v8_0_cp_ecc_error_irq(struct amdgpu_device *adev,
6962                                      struct amdgpu_irq_src *source,
6963                                      struct amdgpu_iv_entry *entry)
6964 {
6965         DRM_ERROR("CP EDC/ECC error detected.");
6966         return 0;
6967 }
6968
6969 static void gfx_v8_0_parse_sq_irq(struct amdgpu_device *adev, unsigned ih_data)
6970 {
6971         u32 enc, se_id, sh_id, cu_id;
6972         char type[20];
6973         int sq_edc_source = -1;
6974
6975         enc = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_CMN, ENCODING);
6976         se_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_CMN, SE_ID);
6977
6978         switch (enc) {
6979                 case 0:
6980                         DRM_INFO("SQ general purpose intr detected:"
6981                                         "se_id %d, immed_overflow %d, host_reg_overflow %d,"
6982                                         "host_cmd_overflow %d, cmd_timestamp %d,"
6983                                         "reg_timestamp %d, thread_trace_buff_full %d,"
6984                                         "wlt %d, thread_trace %d.\n",
6985                                         se_id,
6986                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, IMMED_OVERFLOW),
6987                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, HOST_REG_OVERFLOW),
6988                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, HOST_CMD_OVERFLOW),
6989                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, CMD_TIMESTAMP),
6990                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, REG_TIMESTAMP),
6991                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, THREAD_TRACE_BUF_FULL),
6992                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, WLT),
6993                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, THREAD_TRACE)
6994                                         );
6995                         break;
6996                 case 1:
6997                 case 2:
6998
6999                         cu_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, CU_ID);
7000                         sh_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, SH_ID);
7001
7002                         /*
7003                          * This function can be called either directly from ISR
7004                          * or from BH in which case we can access SQ_EDC_INFO
7005                          * instance
7006                          */
7007                         if (in_task()) {
7008                                 mutex_lock(&adev->grbm_idx_mutex);
7009                                 gfx_v8_0_select_se_sh(adev, se_id, sh_id, cu_id);
7010
7011                                 sq_edc_source = REG_GET_FIELD(RREG32(mmSQ_EDC_INFO), SQ_EDC_INFO, SOURCE);
7012
7013                                 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
7014                                 mutex_unlock(&adev->grbm_idx_mutex);
7015                         }
7016
7017                         if (enc == 1)
7018                                 sprintf(type, "instruction intr");
7019                         else
7020                                 sprintf(type, "EDC/ECC error");
7021
7022                         DRM_INFO(
7023                                 "SQ %s detected: "
7024                                         "se_id %d, sh_id %d, cu_id %d, simd_id %d, wave_id %d, vm_id %d "
7025                                         "trap %s, sq_ed_info.source %s.\n",
7026                                         type, se_id, sh_id, cu_id,
7027                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, SIMD_ID),
7028                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, WAVE_ID),
7029                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, VM_ID),
7030                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, PRIV) ? "true" : "false",
7031                                         (sq_edc_source != -1) ? sq_edc_source_names[sq_edc_source] : "unavailable"
7032                                 );
7033                         break;
7034                 default:
7035                         DRM_ERROR("SQ invalid encoding type\n.");
7036         }
7037 }
7038
7039 static void gfx_v8_0_sq_irq_work_func(struct work_struct *work)
7040 {
7041
7042         struct amdgpu_device *adev = container_of(work, struct amdgpu_device, gfx.sq_work.work);
7043         struct sq_work *sq_work = container_of(work, struct sq_work, work);
7044
7045         gfx_v8_0_parse_sq_irq(adev, sq_work->ih_data);
7046 }
7047
7048 static int gfx_v8_0_sq_irq(struct amdgpu_device *adev,
7049                            struct amdgpu_irq_src *source,
7050                            struct amdgpu_iv_entry *entry)
7051 {
7052         unsigned ih_data = entry->src_data[0];
7053
7054         /*
7055          * Try to submit work so SQ_EDC_INFO can be accessed from
7056          * BH. If previous work submission hasn't finished yet
7057          * just print whatever info is possible directly from the ISR.
7058          */
7059         if (work_pending(&adev->gfx.sq_work.work)) {
7060                 gfx_v8_0_parse_sq_irq(adev, ih_data);
7061         } else {
7062                 adev->gfx.sq_work.ih_data = ih_data;
7063                 schedule_work(&adev->gfx.sq_work.work);
7064         }
7065
7066         return 0;
7067 }
7068
7069 static int gfx_v8_0_kiq_set_interrupt_state(struct amdgpu_device *adev,
7070                                             struct amdgpu_irq_src *src,
7071                                             unsigned int type,
7072                                             enum amdgpu_interrupt_state state)
7073 {
7074         struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
7075
7076         switch (type) {
7077         case AMDGPU_CP_KIQ_IRQ_DRIVER0:
7078                 WREG32_FIELD(CPC_INT_CNTL, GENERIC2_INT_ENABLE,
7079                              state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
7080                 if (ring->me == 1)
7081                         WREG32_FIELD_OFFSET(CP_ME1_PIPE0_INT_CNTL,
7082                                      ring->pipe,
7083                                      GENERIC2_INT_ENABLE,
7084                                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
7085                 else
7086                         WREG32_FIELD_OFFSET(CP_ME2_PIPE0_INT_CNTL,
7087                                      ring->pipe,
7088                                      GENERIC2_INT_ENABLE,
7089                                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
7090                 break;
7091         default:
7092                 BUG(); /* kiq only support GENERIC2_INT now */
7093                 break;
7094         }
7095         return 0;
7096 }
7097
7098 static int gfx_v8_0_kiq_irq(struct amdgpu_device *adev,
7099                             struct amdgpu_irq_src *source,
7100                             struct amdgpu_iv_entry *entry)
7101 {
7102         u8 me_id, pipe_id, queue_id;
7103         struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
7104
7105         me_id = (entry->ring_id & 0x0c) >> 2;
7106         pipe_id = (entry->ring_id & 0x03) >> 0;
7107         queue_id = (entry->ring_id & 0x70) >> 4;
7108         DRM_DEBUG("IH: CPC GENERIC2_INT, me:%d, pipe:%d, queue:%d\n",
7109                    me_id, pipe_id, queue_id);
7110
7111         amdgpu_fence_process(ring);
7112         return 0;
7113 }
7114
7115 static const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
7116         .name = "gfx_v8_0",
7117         .early_init = gfx_v8_0_early_init,
7118         .late_init = gfx_v8_0_late_init,
7119         .sw_init = gfx_v8_0_sw_init,
7120         .sw_fini = gfx_v8_0_sw_fini,
7121         .hw_init = gfx_v8_0_hw_init,
7122         .hw_fini = gfx_v8_0_hw_fini,
7123         .suspend = gfx_v8_0_suspend,
7124         .resume = gfx_v8_0_resume,
7125         .is_idle = gfx_v8_0_is_idle,
7126         .wait_for_idle = gfx_v8_0_wait_for_idle,
7127         .check_soft_reset = gfx_v8_0_check_soft_reset,
7128         .pre_soft_reset = gfx_v8_0_pre_soft_reset,
7129         .soft_reset = gfx_v8_0_soft_reset,
7130         .post_soft_reset = gfx_v8_0_post_soft_reset,
7131         .set_clockgating_state = gfx_v8_0_set_clockgating_state,
7132         .set_powergating_state = gfx_v8_0_set_powergating_state,
7133         .get_clockgating_state = gfx_v8_0_get_clockgating_state,
7134 };
7135
7136 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
7137         .type = AMDGPU_RING_TYPE_GFX,
7138         .align_mask = 0xff,
7139         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
7140         .support_64bit_ptrs = false,
7141         .get_rptr = gfx_v8_0_ring_get_rptr,
7142         .get_wptr = gfx_v8_0_ring_get_wptr_gfx,
7143         .set_wptr = gfx_v8_0_ring_set_wptr_gfx,
7144         .emit_frame_size = /* maximum 215dw if count 16 IBs in */
7145                 5 +  /* COND_EXEC */
7146                 7 +  /* PIPELINE_SYNC */
7147                 VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 9 + /* VM_FLUSH */
7148                 8 +  /* FENCE for VM_FLUSH */
7149                 20 + /* GDS switch */
7150                 4 + /* double SWITCH_BUFFER,
7151                        the first COND_EXEC jump to the place just
7152                            prior to this double SWITCH_BUFFER  */
7153                 5 + /* COND_EXEC */
7154                 7 +      /*     HDP_flush */
7155                 4 +      /*     VGT_flush */
7156                 14 + /* CE_META */
7157                 31 + /* DE_META */
7158                 3 + /* CNTX_CTRL */
7159                 5 + /* HDP_INVL */
7160                 8 + 8 + /* FENCE x2 */
7161                 2, /* SWITCH_BUFFER */
7162         .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_gfx */
7163         .emit_ib = gfx_v8_0_ring_emit_ib_gfx,
7164         .emit_fence = gfx_v8_0_ring_emit_fence_gfx,
7165         .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
7166         .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
7167         .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
7168         .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
7169         .test_ring = gfx_v8_0_ring_test_ring,
7170         .test_ib = gfx_v8_0_ring_test_ib,
7171         .insert_nop = amdgpu_ring_insert_nop,
7172         .pad_ib = amdgpu_ring_generic_pad_ib,
7173         .emit_switch_buffer = gfx_v8_ring_emit_sb,
7174         .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
7175         .init_cond_exec = gfx_v8_0_ring_emit_init_cond_exec,
7176         .patch_cond_exec = gfx_v8_0_ring_emit_patch_cond_exec,
7177         .emit_wreg = gfx_v8_0_ring_emit_wreg,
7178 };
7179
7180 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
7181         .type = AMDGPU_RING_TYPE_COMPUTE,
7182         .align_mask = 0xff,
7183         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
7184         .support_64bit_ptrs = false,
7185         .get_rptr = gfx_v8_0_ring_get_rptr,
7186         .get_wptr = gfx_v8_0_ring_get_wptr_compute,
7187         .set_wptr = gfx_v8_0_ring_set_wptr_compute,
7188         .emit_frame_size =
7189                 20 + /* gfx_v8_0_ring_emit_gds_switch */
7190                 7 + /* gfx_v8_0_ring_emit_hdp_flush */
7191                 5 + /* hdp_invalidate */
7192                 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
7193                 VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v8_0_ring_emit_vm_flush */
7194                 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
7195         .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */
7196         .emit_ib = gfx_v8_0_ring_emit_ib_compute,
7197         .emit_fence = gfx_v8_0_ring_emit_fence_compute,
7198         .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
7199         .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
7200         .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
7201         .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
7202         .test_ring = gfx_v8_0_ring_test_ring,
7203         .test_ib = gfx_v8_0_ring_test_ib,
7204         .insert_nop = amdgpu_ring_insert_nop,
7205         .pad_ib = amdgpu_ring_generic_pad_ib,
7206         .set_priority = gfx_v8_0_ring_set_priority_compute,
7207         .emit_wreg = gfx_v8_0_ring_emit_wreg,
7208 };
7209
7210 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {
7211         .type = AMDGPU_RING_TYPE_KIQ,
7212         .align_mask = 0xff,
7213         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
7214         .support_64bit_ptrs = false,
7215         .get_rptr = gfx_v8_0_ring_get_rptr,
7216         .get_wptr = gfx_v8_0_ring_get_wptr_compute,
7217         .set_wptr = gfx_v8_0_ring_set_wptr_compute,
7218         .emit_frame_size =
7219                 20 + /* gfx_v8_0_ring_emit_gds_switch */
7220                 7 + /* gfx_v8_0_ring_emit_hdp_flush */
7221                 5 + /* hdp_invalidate */
7222                 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
7223                 17 + /* gfx_v8_0_ring_emit_vm_flush */
7224                 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */
7225         .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */
7226         .emit_ib = gfx_v8_0_ring_emit_ib_compute,
7227         .emit_fence = gfx_v8_0_ring_emit_fence_kiq,
7228         .test_ring = gfx_v8_0_ring_test_ring,
7229         .test_ib = gfx_v8_0_ring_test_ib,
7230         .insert_nop = amdgpu_ring_insert_nop,
7231         .pad_ib = amdgpu_ring_generic_pad_ib,
7232         .emit_rreg = gfx_v8_0_ring_emit_rreg,
7233         .emit_wreg = gfx_v8_0_ring_emit_wreg,
7234 };
7235
7236 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
7237 {
7238         int i;
7239
7240         adev->gfx.kiq.ring.funcs = &gfx_v8_0_ring_funcs_kiq;
7241
7242         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
7243                 adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
7244
7245         for (i = 0; i < adev->gfx.num_compute_rings; i++)
7246                 adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
7247 }
7248
7249 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
7250         .set = gfx_v8_0_set_eop_interrupt_state,
7251         .process = gfx_v8_0_eop_irq,
7252 };
7253
7254 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
7255         .set = gfx_v8_0_set_priv_reg_fault_state,
7256         .process = gfx_v8_0_priv_reg_irq,
7257 };
7258
7259 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
7260         .set = gfx_v8_0_set_priv_inst_fault_state,
7261         .process = gfx_v8_0_priv_inst_irq,
7262 };
7263
7264 static const struct amdgpu_irq_src_funcs gfx_v8_0_kiq_irq_funcs = {
7265         .set = gfx_v8_0_kiq_set_interrupt_state,
7266         .process = gfx_v8_0_kiq_irq,
7267 };
7268
7269 static const struct amdgpu_irq_src_funcs gfx_v8_0_cp_ecc_error_irq_funcs = {
7270         .set = gfx_v8_0_set_cp_ecc_int_state,
7271         .process = gfx_v8_0_cp_ecc_error_irq,
7272 };
7273
7274 static const struct amdgpu_irq_src_funcs gfx_v8_0_sq_irq_funcs = {
7275         .set = gfx_v8_0_set_sq_int_state,
7276         .process = gfx_v8_0_sq_irq,
7277 };
7278
7279 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
7280 {
7281         adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
7282         adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
7283
7284         adev->gfx.priv_reg_irq.num_types = 1;
7285         adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
7286
7287         adev->gfx.priv_inst_irq.num_types = 1;
7288         adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
7289
7290         adev->gfx.kiq.irq.num_types = AMDGPU_CP_KIQ_IRQ_LAST;
7291         adev->gfx.kiq.irq.funcs = &gfx_v8_0_kiq_irq_funcs;
7292
7293         adev->gfx.cp_ecc_error_irq.num_types = 1;
7294         adev->gfx.cp_ecc_error_irq.funcs = &gfx_v8_0_cp_ecc_error_irq_funcs;
7295
7296         adev->gfx.sq_irq.num_types = 1;
7297         adev->gfx.sq_irq.funcs = &gfx_v8_0_sq_irq_funcs;
7298 }
7299
7300 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
7301 {
7302         adev->gfx.rlc.funcs = &iceland_rlc_funcs;
7303 }
7304
7305 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
7306 {
7307         /* init asci gds info */
7308         adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
7309         adev->gds.gws.total_size = 64;
7310         adev->gds.oa.total_size = 16;
7311
7312         if (adev->gds.mem.total_size == 64 * 1024) {
7313                 adev->gds.mem.gfx_partition_size = 4096;
7314                 adev->gds.mem.cs_partition_size = 4096;
7315
7316                 adev->gds.gws.gfx_partition_size = 4;
7317                 adev->gds.gws.cs_partition_size = 4;
7318
7319                 adev->gds.oa.gfx_partition_size = 4;
7320                 adev->gds.oa.cs_partition_size = 1;
7321         } else {
7322                 adev->gds.mem.gfx_partition_size = 1024;
7323                 adev->gds.mem.cs_partition_size = 1024;
7324
7325                 adev->gds.gws.gfx_partition_size = 16;
7326                 adev->gds.gws.cs_partition_size = 16;
7327
7328                 adev->gds.oa.gfx_partition_size = 4;
7329                 adev->gds.oa.cs_partition_size = 4;
7330         }
7331 }
7332
7333 static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
7334                                                  u32 bitmap)
7335 {
7336         u32 data;
7337
7338         if (!bitmap)
7339                 return;
7340
7341         data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7342         data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7343
7344         WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
7345 }
7346
7347 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
7348 {
7349         u32 data, mask;
7350
7351         data =  RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) |
7352                 RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
7353
7354         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
7355
7356         return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask;
7357 }
7358
7359 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
7360 {
7361         int i, j, k, counter, active_cu_number = 0;
7362         u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
7363         struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
7364         unsigned disable_masks[4 * 2];
7365         u32 ao_cu_num;
7366
7367         memset(cu_info, 0, sizeof(*cu_info));
7368
7369         if (adev->flags & AMD_IS_APU)
7370                 ao_cu_num = 2;
7371         else
7372                 ao_cu_num = adev->gfx.config.max_cu_per_sh;
7373
7374         amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
7375
7376         mutex_lock(&adev->grbm_idx_mutex);
7377         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
7378                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
7379                         mask = 1;
7380                         ao_bitmap = 0;
7381                         counter = 0;
7382                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
7383                         if (i < 4 && j < 2)
7384                                 gfx_v8_0_set_user_cu_inactive_bitmap(
7385                                         adev, disable_masks[i * 2 + j]);
7386                         bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
7387                         cu_info->bitmap[i][j] = bitmap;
7388
7389                         for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
7390                                 if (bitmap & mask) {
7391                                         if (counter < ao_cu_num)
7392                                                 ao_bitmap |= mask;
7393                                         counter ++;
7394                                 }
7395                                 mask <<= 1;
7396                         }
7397                         active_cu_number += counter;
7398                         if (i < 2 && j < 2)
7399                                 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
7400                         cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
7401                 }
7402         }
7403         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
7404         mutex_unlock(&adev->grbm_idx_mutex);
7405
7406         cu_info->number = active_cu_number;
7407         cu_info->ao_cu_mask = ao_cu_mask;
7408         cu_info->simd_per_cu = NUM_SIMD_PER_CU;
7409         cu_info->max_waves_per_simd = 10;
7410         cu_info->max_scratch_slots_per_cu = 32;
7411         cu_info->wave_front_size = 64;
7412         cu_info->lds_size = 64;
7413 }
7414
7415 const struct amdgpu_ip_block_version gfx_v8_0_ip_block =
7416 {
7417         .type = AMD_IP_BLOCK_TYPE_GFX,
7418         .major = 8,
7419         .minor = 0,
7420         .rev = 0,
7421         .funcs = &gfx_v8_0_ip_funcs,
7422 };
7423
7424 const struct amdgpu_ip_block_version gfx_v8_1_ip_block =
7425 {
7426         .type = AMD_IP_BLOCK_TYPE_GFX,
7427         .major = 8,
7428         .minor = 1,
7429         .rev = 0,
7430         .funcs = &gfx_v8_0_ip_funcs,
7431 };
7432
7433 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
7434 {
7435         uint64_t ce_payload_addr;
7436         int cnt_ce;
7437         union {
7438                 struct vi_ce_ib_state regular;
7439                 struct vi_ce_ib_state_chained_ib chained;
7440         } ce_payload = {};
7441
7442         if (ring->adev->virt.chained_ib_support) {
7443                 ce_payload_addr = amdgpu_csa_vaddr(ring->adev) +
7444                         offsetof(struct vi_gfx_meta_data_chained_ib, ce_payload);
7445                 cnt_ce = (sizeof(ce_payload.chained) >> 2) + 4 - 2;
7446         } else {
7447                 ce_payload_addr = amdgpu_csa_vaddr(ring->adev) +
7448                         offsetof(struct vi_gfx_meta_data, ce_payload);
7449                 cnt_ce = (sizeof(ce_payload.regular) >> 2) + 4 - 2;
7450         }
7451
7452         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_ce));
7453         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
7454                                 WRITE_DATA_DST_SEL(8) |
7455                                 WR_CONFIRM) |
7456                                 WRITE_DATA_CACHE_POLICY(0));
7457         amdgpu_ring_write(ring, lower_32_bits(ce_payload_addr));
7458         amdgpu_ring_write(ring, upper_32_bits(ce_payload_addr));
7459         amdgpu_ring_write_multiple(ring, (void *)&ce_payload, cnt_ce - 2);
7460 }
7461
7462 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring)
7463 {
7464         uint64_t de_payload_addr, gds_addr, csa_addr;
7465         int cnt_de;
7466         union {
7467                 struct vi_de_ib_state regular;
7468                 struct vi_de_ib_state_chained_ib chained;
7469         } de_payload = {};
7470
7471         csa_addr = amdgpu_csa_vaddr(ring->adev);
7472         gds_addr = csa_addr + 4096;
7473         if (ring->adev->virt.chained_ib_support) {
7474                 de_payload.chained.gds_backup_addrlo = lower_32_bits(gds_addr);
7475                 de_payload.chained.gds_backup_addrhi = upper_32_bits(gds_addr);
7476                 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data_chained_ib, de_payload);
7477                 cnt_de = (sizeof(de_payload.chained) >> 2) + 4 - 2;
7478         } else {
7479                 de_payload.regular.gds_backup_addrlo = lower_32_bits(gds_addr);
7480                 de_payload.regular.gds_backup_addrhi = upper_32_bits(gds_addr);
7481                 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data, de_payload);
7482                 cnt_de = (sizeof(de_payload.regular) >> 2) + 4 - 2;
7483         }
7484
7485         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_de));
7486         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
7487                                 WRITE_DATA_DST_SEL(8) |
7488                                 WR_CONFIRM) |
7489                                 WRITE_DATA_CACHE_POLICY(0));
7490         amdgpu_ring_write(ring, lower_32_bits(de_payload_addr));
7491         amdgpu_ring_write(ring, upper_32_bits(de_payload_addr));
7492         amdgpu_ring_write_multiple(ring, (void *)&de_payload, cnt_de - 2);
7493 }
This page took 0.498398 seconds and 4 git commands to generate.