]> Git Repo - linux.git/blob - drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
Merge tag 'ntb-4.15' of git://github.com/jonmason/ntb
[linux.git] / drivers / gpu / drm / amd / amdgpu / gfx_v8_0.c
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 #include <linux/kernel.h>
24 #include <linux/firmware.h>
25 #include <drm/drmP.h>
26 #include "amdgpu.h"
27 #include "amdgpu_gfx.h"
28 #include "vi.h"
29 #include "vi_structs.h"
30 #include "vid.h"
31 #include "amdgpu_ucode.h"
32 #include "amdgpu_atombios.h"
33 #include "atombios_i2c.h"
34 #include "clearstate_vi.h"
35
36 #include "gmc/gmc_8_2_d.h"
37 #include "gmc/gmc_8_2_sh_mask.h"
38
39 #include "oss/oss_3_0_d.h"
40 #include "oss/oss_3_0_sh_mask.h"
41
42 #include "bif/bif_5_0_d.h"
43 #include "bif/bif_5_0_sh_mask.h"
44 #include "gca/gfx_8_0_d.h"
45 #include "gca/gfx_8_0_enum.h"
46 #include "gca/gfx_8_0_sh_mask.h"
47 #include "gca/gfx_8_0_enum.h"
48
49 #include "dce/dce_10_0_d.h"
50 #include "dce/dce_10_0_sh_mask.h"
51
52 #include "smu/smu_7_1_3_d.h"
53
54 #define GFX8_NUM_GFX_RINGS     1
55 #define GFX8_MEC_HPD_SIZE 2048
56
57 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
58 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
59 #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
60 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
61
62 #define ARRAY_MODE(x)                                   ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
63 #define PIPE_CONFIG(x)                                  ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
64 #define TILE_SPLIT(x)                                   ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
65 #define MICRO_TILE_MODE_NEW(x)                          ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
66 #define SAMPLE_SPLIT(x)                                 ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
67 #define BANK_WIDTH(x)                                   ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
68 #define BANK_HEIGHT(x)                                  ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
69 #define MACRO_TILE_ASPECT(x)                            ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
70 #define NUM_BANKS(x)                                    ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
71
72 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK            0x00000001L
73 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK            0x00000002L
74 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK           0x00000004L
75 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK           0x00000008L
76 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK           0x00000010L
77 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK           0x00000020L
78
79 /* BPM SERDES CMD */
80 #define SET_BPM_SERDES_CMD    1
81 #define CLE_BPM_SERDES_CMD    0
82
83 /* BPM Register Address*/
84 enum {
85         BPM_REG_CGLS_EN = 0,        /* Enable/Disable CGLS */
86         BPM_REG_CGLS_ON,            /* ON/OFF CGLS: shall be controlled by RLC FW */
87         BPM_REG_CGCG_OVERRIDE,      /* Set/Clear CGCG Override */
88         BPM_REG_MGCG_OVERRIDE,      /* Set/Clear MGCG Override */
89         BPM_REG_FGCG_OVERRIDE,      /* Set/Clear FGCG Override */
90         BPM_REG_FGCG_MAX
91 };
92
93 #define RLC_FormatDirectRegListLength        14
94
95 MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
96 MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
97 MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
98 MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
99 MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
100 MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
101
102 MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
103 MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
104 MODULE_FIRMWARE("amdgpu/stoney_me.bin");
105 MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
106 MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
107
108 MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
109 MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
110 MODULE_FIRMWARE("amdgpu/tonga_me.bin");
111 MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
112 MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
113 MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
114
115 MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
116 MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
117 MODULE_FIRMWARE("amdgpu/topaz_me.bin");
118 MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
119 MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
120
121 MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
122 MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
123 MODULE_FIRMWARE("amdgpu/fiji_me.bin");
124 MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
125 MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
126 MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
127
128 MODULE_FIRMWARE("amdgpu/polaris11_ce.bin");
129 MODULE_FIRMWARE("amdgpu/polaris11_ce_2.bin");
130 MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin");
131 MODULE_FIRMWARE("amdgpu/polaris11_pfp_2.bin");
132 MODULE_FIRMWARE("amdgpu/polaris11_me.bin");
133 MODULE_FIRMWARE("amdgpu/polaris11_me_2.bin");
134 MODULE_FIRMWARE("amdgpu/polaris11_mec.bin");
135 MODULE_FIRMWARE("amdgpu/polaris11_mec_2.bin");
136 MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin");
137 MODULE_FIRMWARE("amdgpu/polaris11_mec2_2.bin");
138 MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin");
139
140 MODULE_FIRMWARE("amdgpu/polaris10_ce.bin");
141 MODULE_FIRMWARE("amdgpu/polaris10_ce_2.bin");
142 MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin");
143 MODULE_FIRMWARE("amdgpu/polaris10_pfp_2.bin");
144 MODULE_FIRMWARE("amdgpu/polaris10_me.bin");
145 MODULE_FIRMWARE("amdgpu/polaris10_me_2.bin");
146 MODULE_FIRMWARE("amdgpu/polaris10_mec.bin");
147 MODULE_FIRMWARE("amdgpu/polaris10_mec_2.bin");
148 MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin");
149 MODULE_FIRMWARE("amdgpu/polaris10_mec2_2.bin");
150 MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin");
151
152 MODULE_FIRMWARE("amdgpu/polaris12_ce.bin");
153 MODULE_FIRMWARE("amdgpu/polaris12_ce_2.bin");
154 MODULE_FIRMWARE("amdgpu/polaris12_pfp.bin");
155 MODULE_FIRMWARE("amdgpu/polaris12_pfp_2.bin");
156 MODULE_FIRMWARE("amdgpu/polaris12_me.bin");
157 MODULE_FIRMWARE("amdgpu/polaris12_me_2.bin");
158 MODULE_FIRMWARE("amdgpu/polaris12_mec.bin");
159 MODULE_FIRMWARE("amdgpu/polaris12_mec_2.bin");
160 MODULE_FIRMWARE("amdgpu/polaris12_mec2.bin");
161 MODULE_FIRMWARE("amdgpu/polaris12_mec2_2.bin");
162 MODULE_FIRMWARE("amdgpu/polaris12_rlc.bin");
163
164 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
165 {
166         {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
167         {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
168         {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
169         {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
170         {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
171         {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
172         {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
173         {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
174         {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
175         {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
176         {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
177         {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
178         {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
179         {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
180         {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
181         {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
182 };
183
184 static const u32 golden_settings_tonga_a11[] =
185 {
186         mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
187         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
188         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
189         mmGB_GPU_ID, 0x0000000f, 0x00000000,
190         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
191         mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
192         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
193         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
194         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
195         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
196         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
197         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
198         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
199         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
200         mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
201         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
202 };
203
204 static const u32 tonga_golden_common_all[] =
205 {
206         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
207         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
208         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
209         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
210         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
211         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
212         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
213         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
214 };
215
216 static const u32 tonga_mgcg_cgcg_init[] =
217 {
218         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
219         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
220         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
221         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
222         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
223         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
224         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
225         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
226         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
227         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
228         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
229         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
230         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
231         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
232         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
233         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
234         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
235         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
236         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
237         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
238         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
239         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
240         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
241         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
242         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
243         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
244         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
245         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
246         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
247         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
248         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
249         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
250         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
251         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
252         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
253         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
254         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
255         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
256         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
257         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
258         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
259         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
260         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
261         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
262         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
263         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
264         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
265         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
266         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
267         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
268         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
269         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
270         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
271         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
272         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
273         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
274         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
275         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
276         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
277         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
278         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
279         mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
280         mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
281         mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
282         mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
283         mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
284         mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
285         mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
286         mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
287         mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
288         mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
289         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
290         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
291         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
292         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
293 };
294
295 static const u32 golden_settings_polaris11_a11[] =
296 {
297         mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208,
298         mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
299         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
300         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
301         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
302         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
303         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
304         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
305         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
306         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
307         mmSQ_CONFIG, 0x07f80000, 0x01180000,
308         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
309         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
310         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
311         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
312         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
313         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
314 };
315
316 static const u32 polaris11_golden_common_all[] =
317 {
318         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
319         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002,
320         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
321         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
322         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
323         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
324 };
325
326 static const u32 golden_settings_polaris10_a11[] =
327 {
328         mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
329         mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
330         mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
331         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
332         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
333         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
334         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
335         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
336         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a,
337         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
338         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
339         mmSQ_CONFIG, 0x07f80000, 0x07180000,
340         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
341         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
342         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
343         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
344         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
345 };
346
347 static const u32 polaris10_golden_common_all[] =
348 {
349         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
350         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
351         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
352         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
353         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
354         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
355         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
356         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
357 };
358
359 static const u32 fiji_golden_common_all[] =
360 {
361         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
362         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
363         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
364         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
365         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
366         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
367         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
368         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
369         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
370         mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
371 };
372
373 static const u32 golden_settings_fiji_a10[] =
374 {
375         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
376         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
377         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
378         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
379         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
380         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
381         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
382         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
383         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
384         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
385         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
386 };
387
388 static const u32 fiji_mgcg_cgcg_init[] =
389 {
390         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
391         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
392         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
393         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
394         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
395         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
396         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
397         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
398         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
399         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
400         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
401         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
402         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
403         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
404         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
405         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
406         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
407         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
408         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
409         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
410         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
411         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
412         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
413         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
414         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
415         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
416         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
417         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
418         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
419         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
420         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
421         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
422         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
423         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
424         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
425 };
426
427 static const u32 golden_settings_iceland_a11[] =
428 {
429         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
430         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
431         mmDB_DEBUG3, 0xc0000000, 0xc0000000,
432         mmGB_GPU_ID, 0x0000000f, 0x00000000,
433         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
434         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
435         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
436         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
437         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
438         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
439         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
440         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
441         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
442         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
443         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
444         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
445 };
446
447 static const u32 iceland_golden_common_all[] =
448 {
449         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
450         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
451         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
452         mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
453         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
454         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
455         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
456         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
457 };
458
459 static const u32 iceland_mgcg_cgcg_init[] =
460 {
461         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
462         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
463         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
464         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
465         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
466         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
467         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
468         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
469         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
470         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
471         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
472         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
473         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
474         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
475         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
476         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
477         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
478         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
479         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
480         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
481         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
482         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
483         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
484         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
485         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
486         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
487         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
488         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
489         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
490         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
491         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
492         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
493         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
494         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
495         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
496         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
497         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
498         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
499         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
500         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
501         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
502         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
503         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
504         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
505         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
506         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
507         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
508         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
509         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
510         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
511         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
512         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
513         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
514         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
515         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
516         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
517         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
518         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
519         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
520         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
521         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
522         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
523         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
524         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
525 };
526
527 static const u32 cz_golden_settings_a11[] =
528 {
529         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
530         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
531         mmGB_GPU_ID, 0x0000000f, 0x00000000,
532         mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
533         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
534         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
535         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
536         mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
537         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
538         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
539         mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
540         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
541 };
542
543 static const u32 cz_golden_common_all[] =
544 {
545         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
546         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
547         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
548         mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
549         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
550         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
551         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
552         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
553 };
554
555 static const u32 cz_mgcg_cgcg_init[] =
556 {
557         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
558         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
559         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
560         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
561         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
562         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
563         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
564         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
565         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
566         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
567         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
568         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
569         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
570         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
571         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
572         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
573         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
574         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
575         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
576         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
577         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
578         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
579         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
580         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
581         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
582         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
583         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
584         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
585         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
586         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
587         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
588         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
589         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
590         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
591         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
592         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
593         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
594         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
595         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
596         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
597         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
598         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
599         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
600         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
601         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
602         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
603         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
604         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
605         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
606         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
607         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
608         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
609         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
610         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
611         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
612         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
613         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
614         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
615         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
616         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
617         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
618         mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
619         mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
620         mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
621         mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
622         mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
623         mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
624         mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
625         mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
626         mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
627         mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
628         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
629         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
630         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
631         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
632 };
633
634 static const u32 stoney_golden_settings_a11[] =
635 {
636         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
637         mmGB_GPU_ID, 0x0000000f, 0x00000000,
638         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
639         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
640         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
641         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
642         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
643         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
644         mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
645         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
646 };
647
648 static const u32 stoney_golden_common_all[] =
649 {
650         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
651         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
652         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
653         mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
654         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
655         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
656         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
657         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
658 };
659
660 static const u32 stoney_mgcg_cgcg_init[] =
661 {
662         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
663         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
664         mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
665         mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
666         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
667 };
668
669 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
670 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
671 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
672 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
673 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
674 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
675 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring);
676 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring);
677
678 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
679 {
680         switch (adev->asic_type) {
681         case CHIP_TOPAZ:
682                 amdgpu_program_register_sequence(adev,
683                                                  iceland_mgcg_cgcg_init,
684                                                  (const u32)ARRAY_SIZE(iceland_mgcg_cgcg_init));
685                 amdgpu_program_register_sequence(adev,
686                                                  golden_settings_iceland_a11,
687                                                  (const u32)ARRAY_SIZE(golden_settings_iceland_a11));
688                 amdgpu_program_register_sequence(adev,
689                                                  iceland_golden_common_all,
690                                                  (const u32)ARRAY_SIZE(iceland_golden_common_all));
691                 break;
692         case CHIP_FIJI:
693                 amdgpu_program_register_sequence(adev,
694                                                  fiji_mgcg_cgcg_init,
695                                                  (const u32)ARRAY_SIZE(fiji_mgcg_cgcg_init));
696                 amdgpu_program_register_sequence(adev,
697                                                  golden_settings_fiji_a10,
698                                                  (const u32)ARRAY_SIZE(golden_settings_fiji_a10));
699                 amdgpu_program_register_sequence(adev,
700                                                  fiji_golden_common_all,
701                                                  (const u32)ARRAY_SIZE(fiji_golden_common_all));
702                 break;
703
704         case CHIP_TONGA:
705                 amdgpu_program_register_sequence(adev,
706                                                  tonga_mgcg_cgcg_init,
707                                                  (const u32)ARRAY_SIZE(tonga_mgcg_cgcg_init));
708                 amdgpu_program_register_sequence(adev,
709                                                  golden_settings_tonga_a11,
710                                                  (const u32)ARRAY_SIZE(golden_settings_tonga_a11));
711                 amdgpu_program_register_sequence(adev,
712                                                  tonga_golden_common_all,
713                                                  (const u32)ARRAY_SIZE(tonga_golden_common_all));
714                 break;
715         case CHIP_POLARIS11:
716         case CHIP_POLARIS12:
717                 amdgpu_program_register_sequence(adev,
718                                                  golden_settings_polaris11_a11,
719                                                  (const u32)ARRAY_SIZE(golden_settings_polaris11_a11));
720                 amdgpu_program_register_sequence(adev,
721                                                  polaris11_golden_common_all,
722                                                  (const u32)ARRAY_SIZE(polaris11_golden_common_all));
723                 break;
724         case CHIP_POLARIS10:
725                 amdgpu_program_register_sequence(adev,
726                                                  golden_settings_polaris10_a11,
727                                                  (const u32)ARRAY_SIZE(golden_settings_polaris10_a11));
728                 amdgpu_program_register_sequence(adev,
729                                                  polaris10_golden_common_all,
730                                                  (const u32)ARRAY_SIZE(polaris10_golden_common_all));
731                 WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C);
732                 if (adev->pdev->revision == 0xc7 &&
733                     ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) ||
734                      (adev->pdev->subsystem_device == 0x4a8 && adev->pdev->subsystem_vendor == 0x1043) ||
735                      (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1682))) {
736                         amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD);
737                         amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0);
738                 }
739                 break;
740         case CHIP_CARRIZO:
741                 amdgpu_program_register_sequence(adev,
742                                                  cz_mgcg_cgcg_init,
743                                                  (const u32)ARRAY_SIZE(cz_mgcg_cgcg_init));
744                 amdgpu_program_register_sequence(adev,
745                                                  cz_golden_settings_a11,
746                                                  (const u32)ARRAY_SIZE(cz_golden_settings_a11));
747                 amdgpu_program_register_sequence(adev,
748                                                  cz_golden_common_all,
749                                                  (const u32)ARRAY_SIZE(cz_golden_common_all));
750                 break;
751         case CHIP_STONEY:
752                 amdgpu_program_register_sequence(adev,
753                                                  stoney_mgcg_cgcg_init,
754                                                  (const u32)ARRAY_SIZE(stoney_mgcg_cgcg_init));
755                 amdgpu_program_register_sequence(adev,
756                                                  stoney_golden_settings_a11,
757                                                  (const u32)ARRAY_SIZE(stoney_golden_settings_a11));
758                 amdgpu_program_register_sequence(adev,
759                                                  stoney_golden_common_all,
760                                                  (const u32)ARRAY_SIZE(stoney_golden_common_all));
761                 break;
762         default:
763                 break;
764         }
765 }
766
767 static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
768 {
769         adev->gfx.scratch.num_reg = 8;
770         adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
771         adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
772 }
773
774 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
775 {
776         struct amdgpu_device *adev = ring->adev;
777         uint32_t scratch;
778         uint32_t tmp = 0;
779         unsigned i;
780         int r;
781
782         r = amdgpu_gfx_scratch_get(adev, &scratch);
783         if (r) {
784                 DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
785                 return r;
786         }
787         WREG32(scratch, 0xCAFEDEAD);
788         r = amdgpu_ring_alloc(ring, 3);
789         if (r) {
790                 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
791                           ring->idx, r);
792                 amdgpu_gfx_scratch_free(adev, scratch);
793                 return r;
794         }
795         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
796         amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
797         amdgpu_ring_write(ring, 0xDEADBEEF);
798         amdgpu_ring_commit(ring);
799
800         for (i = 0; i < adev->usec_timeout; i++) {
801                 tmp = RREG32(scratch);
802                 if (tmp == 0xDEADBEEF)
803                         break;
804                 DRM_UDELAY(1);
805         }
806         if (i < adev->usec_timeout) {
807                 DRM_INFO("ring test on %d succeeded in %d usecs\n",
808                          ring->idx, i);
809         } else {
810                 DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
811                           ring->idx, scratch, tmp);
812                 r = -EINVAL;
813         }
814         amdgpu_gfx_scratch_free(adev, scratch);
815         return r;
816 }
817
818 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
819 {
820         struct amdgpu_device *adev = ring->adev;
821         struct amdgpu_ib ib;
822         struct dma_fence *f = NULL;
823         uint32_t scratch;
824         uint32_t tmp = 0;
825         long r;
826
827         r = amdgpu_gfx_scratch_get(adev, &scratch);
828         if (r) {
829                 DRM_ERROR("amdgpu: failed to get scratch reg (%ld).\n", r);
830                 return r;
831         }
832         WREG32(scratch, 0xCAFEDEAD);
833         memset(&ib, 0, sizeof(ib));
834         r = amdgpu_ib_get(adev, NULL, 256, &ib);
835         if (r) {
836                 DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
837                 goto err1;
838         }
839         ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
840         ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START));
841         ib.ptr[2] = 0xDEADBEEF;
842         ib.length_dw = 3;
843
844         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
845         if (r)
846                 goto err2;
847
848         r = dma_fence_wait_timeout(f, false, timeout);
849         if (r == 0) {
850                 DRM_ERROR("amdgpu: IB test timed out.\n");
851                 r = -ETIMEDOUT;
852                 goto err2;
853         } else if (r < 0) {
854                 DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
855                 goto err2;
856         }
857         tmp = RREG32(scratch);
858         if (tmp == 0xDEADBEEF) {
859                 DRM_INFO("ib test on ring %d succeeded\n", ring->idx);
860                 r = 0;
861         } else {
862                 DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n",
863                           scratch, tmp);
864                 r = -EINVAL;
865         }
866 err2:
867         amdgpu_ib_free(adev, &ib, NULL);
868         dma_fence_put(f);
869 err1:
870         amdgpu_gfx_scratch_free(adev, scratch);
871         return r;
872 }
873
874
875 static void gfx_v8_0_free_microcode(struct amdgpu_device *adev)
876 {
877         release_firmware(adev->gfx.pfp_fw);
878         adev->gfx.pfp_fw = NULL;
879         release_firmware(adev->gfx.me_fw);
880         adev->gfx.me_fw = NULL;
881         release_firmware(adev->gfx.ce_fw);
882         adev->gfx.ce_fw = NULL;
883         release_firmware(adev->gfx.rlc_fw);
884         adev->gfx.rlc_fw = NULL;
885         release_firmware(adev->gfx.mec_fw);
886         adev->gfx.mec_fw = NULL;
887         if ((adev->asic_type != CHIP_STONEY) &&
888             (adev->asic_type != CHIP_TOPAZ))
889                 release_firmware(adev->gfx.mec2_fw);
890         adev->gfx.mec2_fw = NULL;
891
892         kfree(adev->gfx.rlc.register_list_format);
893 }
894
895 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
896 {
897         const char *chip_name;
898         char fw_name[30];
899         int err;
900         struct amdgpu_firmware_info *info = NULL;
901         const struct common_firmware_header *header = NULL;
902         const struct gfx_firmware_header_v1_0 *cp_hdr;
903         const struct rlc_firmware_header_v2_0 *rlc_hdr;
904         unsigned int *tmp = NULL, i;
905
906         DRM_DEBUG("\n");
907
908         switch (adev->asic_type) {
909         case CHIP_TOPAZ:
910                 chip_name = "topaz";
911                 break;
912         case CHIP_TONGA:
913                 chip_name = "tonga";
914                 break;
915         case CHIP_CARRIZO:
916                 chip_name = "carrizo";
917                 break;
918         case CHIP_FIJI:
919                 chip_name = "fiji";
920                 break;
921         case CHIP_POLARIS11:
922                 chip_name = "polaris11";
923                 break;
924         case CHIP_POLARIS10:
925                 chip_name = "polaris10";
926                 break;
927         case CHIP_POLARIS12:
928                 chip_name = "polaris12";
929                 break;
930         case CHIP_STONEY:
931                 chip_name = "stoney";
932                 break;
933         default:
934                 BUG();
935         }
936
937         if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
938                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp_2.bin", chip_name);
939                 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
940                 if (err == -ENOENT) {
941                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
942                         err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
943                 }
944         } else {
945                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
946                 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
947         }
948         if (err)
949                 goto out;
950         err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
951         if (err)
952                 goto out;
953         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
954         adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
955         adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
956
957         if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
958                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me_2.bin", chip_name);
959                 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
960                 if (err == -ENOENT) {
961                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
962                         err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
963                 }
964         } else {
965                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
966                 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
967         }
968         if (err)
969                 goto out;
970         err = amdgpu_ucode_validate(adev->gfx.me_fw);
971         if (err)
972                 goto out;
973         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
974         adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
975
976         adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
977
978         if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
979                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce_2.bin", chip_name);
980                 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
981                 if (err == -ENOENT) {
982                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
983                         err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
984                 }
985         } else {
986                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
987                 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
988         }
989         if (err)
990                 goto out;
991         err = amdgpu_ucode_validate(adev->gfx.ce_fw);
992         if (err)
993                 goto out;
994         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
995         adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
996         adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
997
998         /*
999          * Support for MCBP/Virtualization in combination with chained IBs is
1000          * formal released on feature version #46
1001          */
1002         if (adev->gfx.ce_feature_version >= 46 &&
1003             adev->gfx.pfp_feature_version >= 46) {
1004                 adev->virt.chained_ib_support = true;
1005                 DRM_INFO("Chained IB support enabled!\n");
1006         } else
1007                 adev->virt.chained_ib_support = false;
1008
1009         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
1010         err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
1011         if (err)
1012                 goto out;
1013         err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
1014         rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1015         adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
1016         adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
1017
1018         adev->gfx.rlc.save_and_restore_offset =
1019                         le32_to_cpu(rlc_hdr->save_and_restore_offset);
1020         adev->gfx.rlc.clear_state_descriptor_offset =
1021                         le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
1022         adev->gfx.rlc.avail_scratch_ram_locations =
1023                         le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
1024         adev->gfx.rlc.reg_restore_list_size =
1025                         le32_to_cpu(rlc_hdr->reg_restore_list_size);
1026         adev->gfx.rlc.reg_list_format_start =
1027                         le32_to_cpu(rlc_hdr->reg_list_format_start);
1028         adev->gfx.rlc.reg_list_format_separate_start =
1029                         le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
1030         adev->gfx.rlc.starting_offsets_start =
1031                         le32_to_cpu(rlc_hdr->starting_offsets_start);
1032         adev->gfx.rlc.reg_list_format_size_bytes =
1033                         le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
1034         adev->gfx.rlc.reg_list_size_bytes =
1035                         le32_to_cpu(rlc_hdr->reg_list_size_bytes);
1036
1037         adev->gfx.rlc.register_list_format =
1038                         kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
1039                                         adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
1040
1041         if (!adev->gfx.rlc.register_list_format) {
1042                 err = -ENOMEM;
1043                 goto out;
1044         }
1045
1046         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1047                         le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1048         for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++)
1049                 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
1050
1051         adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1052
1053         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1054                         le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1055         for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++)
1056                 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
1057
1058         if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1059                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec_2.bin", chip_name);
1060                 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1061                 if (err == -ENOENT) {
1062                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1063                         err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1064                 }
1065         } else {
1066                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1067                 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1068         }
1069         if (err)
1070                 goto out;
1071         err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1072         if (err)
1073                 goto out;
1074         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1075         adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1076         adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1077
1078         if ((adev->asic_type != CHIP_STONEY) &&
1079             (adev->asic_type != CHIP_TOPAZ)) {
1080                 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1081                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2_2.bin", chip_name);
1082                         err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1083                         if (err == -ENOENT) {
1084                                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1085                                 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1086                         }
1087                 } else {
1088                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1089                         err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1090                 }
1091                 if (!err) {
1092                         err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1093                         if (err)
1094                                 goto out;
1095                         cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1096                                 adev->gfx.mec2_fw->data;
1097                         adev->gfx.mec2_fw_version =
1098                                 le32_to_cpu(cp_hdr->header.ucode_version);
1099                         adev->gfx.mec2_feature_version =
1100                                 le32_to_cpu(cp_hdr->ucode_feature_version);
1101                 } else {
1102                         err = 0;
1103                         adev->gfx.mec2_fw = NULL;
1104                 }
1105         }
1106
1107         if (adev->firmware.load_type == AMDGPU_FW_LOAD_SMU) {
1108                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1109                 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1110                 info->fw = adev->gfx.pfp_fw;
1111                 header = (const struct common_firmware_header *)info->fw->data;
1112                 adev->firmware.fw_size +=
1113                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1114
1115                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1116                 info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1117                 info->fw = adev->gfx.me_fw;
1118                 header = (const struct common_firmware_header *)info->fw->data;
1119                 adev->firmware.fw_size +=
1120                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1121
1122                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1123                 info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1124                 info->fw = adev->gfx.ce_fw;
1125                 header = (const struct common_firmware_header *)info->fw->data;
1126                 adev->firmware.fw_size +=
1127                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1128
1129                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1130                 info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1131                 info->fw = adev->gfx.rlc_fw;
1132                 header = (const struct common_firmware_header *)info->fw->data;
1133                 adev->firmware.fw_size +=
1134                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1135
1136                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1137                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1138                 info->fw = adev->gfx.mec_fw;
1139                 header = (const struct common_firmware_header *)info->fw->data;
1140                 adev->firmware.fw_size +=
1141                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1142
1143                 /* we need account JT in */
1144                 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1145                 adev->firmware.fw_size +=
1146                         ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE);
1147
1148                 if (amdgpu_sriov_vf(adev)) {
1149                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_STORAGE];
1150                         info->ucode_id = AMDGPU_UCODE_ID_STORAGE;
1151                         info->fw = adev->gfx.mec_fw;
1152                         adev->firmware.fw_size +=
1153                                 ALIGN(le32_to_cpu(64 * PAGE_SIZE), PAGE_SIZE);
1154                 }
1155
1156                 if (adev->gfx.mec2_fw) {
1157                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1158                         info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1159                         info->fw = adev->gfx.mec2_fw;
1160                         header = (const struct common_firmware_header *)info->fw->data;
1161                         adev->firmware.fw_size +=
1162                                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1163                 }
1164
1165         }
1166
1167 out:
1168         if (err) {
1169                 dev_err(adev->dev,
1170                         "gfx8: Failed to load firmware \"%s\"\n",
1171                         fw_name);
1172                 release_firmware(adev->gfx.pfp_fw);
1173                 adev->gfx.pfp_fw = NULL;
1174                 release_firmware(adev->gfx.me_fw);
1175                 adev->gfx.me_fw = NULL;
1176                 release_firmware(adev->gfx.ce_fw);
1177                 adev->gfx.ce_fw = NULL;
1178                 release_firmware(adev->gfx.rlc_fw);
1179                 adev->gfx.rlc_fw = NULL;
1180                 release_firmware(adev->gfx.mec_fw);
1181                 adev->gfx.mec_fw = NULL;
1182                 release_firmware(adev->gfx.mec2_fw);
1183                 adev->gfx.mec2_fw = NULL;
1184         }
1185         return err;
1186 }
1187
1188 static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
1189                                     volatile u32 *buffer)
1190 {
1191         u32 count = 0, i;
1192         const struct cs_section_def *sect = NULL;
1193         const struct cs_extent_def *ext = NULL;
1194
1195         if (adev->gfx.rlc.cs_data == NULL)
1196                 return;
1197         if (buffer == NULL)
1198                 return;
1199
1200         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1201         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1202
1203         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1204         buffer[count++] = cpu_to_le32(0x80000000);
1205         buffer[count++] = cpu_to_le32(0x80000000);
1206
1207         for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1208                 for (ext = sect->section; ext->extent != NULL; ++ext) {
1209                         if (sect->id == SECT_CONTEXT) {
1210                                 buffer[count++] =
1211                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1212                                 buffer[count++] = cpu_to_le32(ext->reg_index -
1213                                                 PACKET3_SET_CONTEXT_REG_START);
1214                                 for (i = 0; i < ext->reg_count; i++)
1215                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
1216                         } else {
1217                                 return;
1218                         }
1219                 }
1220         }
1221
1222         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1223         buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
1224                         PACKET3_SET_CONTEXT_REG_START);
1225         buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config);
1226         buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1);
1227
1228         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1229         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1230
1231         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1232         buffer[count++] = cpu_to_le32(0);
1233 }
1234
1235 static void cz_init_cp_jump_table(struct amdgpu_device *adev)
1236 {
1237         const __le32 *fw_data;
1238         volatile u32 *dst_ptr;
1239         int me, i, max_me = 4;
1240         u32 bo_offset = 0;
1241         u32 table_offset, table_size;
1242
1243         if (adev->asic_type == CHIP_CARRIZO)
1244                 max_me = 5;
1245
1246         /* write the cp table buffer */
1247         dst_ptr = adev->gfx.rlc.cp_table_ptr;
1248         for (me = 0; me < max_me; me++) {
1249                 if (me == 0) {
1250                         const struct gfx_firmware_header_v1_0 *hdr =
1251                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1252                         fw_data = (const __le32 *)
1253                                 (adev->gfx.ce_fw->data +
1254                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1255                         table_offset = le32_to_cpu(hdr->jt_offset);
1256                         table_size = le32_to_cpu(hdr->jt_size);
1257                 } else if (me == 1) {
1258                         const struct gfx_firmware_header_v1_0 *hdr =
1259                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1260                         fw_data = (const __le32 *)
1261                                 (adev->gfx.pfp_fw->data +
1262                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1263                         table_offset = le32_to_cpu(hdr->jt_offset);
1264                         table_size = le32_to_cpu(hdr->jt_size);
1265                 } else if (me == 2) {
1266                         const struct gfx_firmware_header_v1_0 *hdr =
1267                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1268                         fw_data = (const __le32 *)
1269                                 (adev->gfx.me_fw->data +
1270                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1271                         table_offset = le32_to_cpu(hdr->jt_offset);
1272                         table_size = le32_to_cpu(hdr->jt_size);
1273                 } else if (me == 3) {
1274                         const struct gfx_firmware_header_v1_0 *hdr =
1275                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1276                         fw_data = (const __le32 *)
1277                                 (adev->gfx.mec_fw->data +
1278                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1279                         table_offset = le32_to_cpu(hdr->jt_offset);
1280                         table_size = le32_to_cpu(hdr->jt_size);
1281                 } else  if (me == 4) {
1282                         const struct gfx_firmware_header_v1_0 *hdr =
1283                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
1284                         fw_data = (const __le32 *)
1285                                 (adev->gfx.mec2_fw->data +
1286                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1287                         table_offset = le32_to_cpu(hdr->jt_offset);
1288                         table_size = le32_to_cpu(hdr->jt_size);
1289                 }
1290
1291                 for (i = 0; i < table_size; i ++) {
1292                         dst_ptr[bo_offset + i] =
1293                                 cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
1294                 }
1295
1296                 bo_offset += table_size;
1297         }
1298 }
1299
1300 static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev)
1301 {
1302         amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, NULL, NULL);
1303         amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, NULL, NULL);
1304 }
1305
1306 static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
1307 {
1308         volatile u32 *dst_ptr;
1309         u32 dws;
1310         const struct cs_section_def *cs_data;
1311         int r;
1312
1313         adev->gfx.rlc.cs_data = vi_cs_data;
1314
1315         cs_data = adev->gfx.rlc.cs_data;
1316
1317         if (cs_data) {
1318                 /* clear state block */
1319                 adev->gfx.rlc.clear_state_size = dws = gfx_v8_0_get_csb_size(adev);
1320
1321                 r = amdgpu_bo_create_reserved(adev, dws * 4, PAGE_SIZE,
1322                                               AMDGPU_GEM_DOMAIN_VRAM,
1323                                               &adev->gfx.rlc.clear_state_obj,
1324                                               &adev->gfx.rlc.clear_state_gpu_addr,
1325                                               (void **)&adev->gfx.rlc.cs_ptr);
1326                 if (r) {
1327                         dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r);
1328                         gfx_v8_0_rlc_fini(adev);
1329                         return r;
1330                 }
1331
1332                 /* set up the cs buffer */
1333                 dst_ptr = adev->gfx.rlc.cs_ptr;
1334                 gfx_v8_0_get_csb_buffer(adev, dst_ptr);
1335                 amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
1336                 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1337         }
1338
1339         if ((adev->asic_type == CHIP_CARRIZO) ||
1340             (adev->asic_type == CHIP_STONEY)) {
1341                 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1342                 r = amdgpu_bo_create_reserved(adev, adev->gfx.rlc.cp_table_size,
1343                                               PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
1344                                               &adev->gfx.rlc.cp_table_obj,
1345                                               &adev->gfx.rlc.cp_table_gpu_addr,
1346                                               (void **)&adev->gfx.rlc.cp_table_ptr);
1347                 if (r) {
1348                         dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r);
1349                         return r;
1350                 }
1351
1352                 cz_init_cp_jump_table(adev);
1353
1354                 amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj);
1355                 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1356         }
1357
1358         return 0;
1359 }
1360
1361 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
1362 {
1363         amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1364 }
1365
1366 static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
1367 {
1368         int r;
1369         u32 *hpd;
1370         size_t mec_hpd_size;
1371
1372         bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1373
1374         /* take ownership of the relevant compute queues */
1375         amdgpu_gfx_compute_queue_acquire(adev);
1376
1377         mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE;
1378
1379         r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1380                                       AMDGPU_GEM_DOMAIN_GTT,
1381                                       &adev->gfx.mec.hpd_eop_obj,
1382                                       &adev->gfx.mec.hpd_eop_gpu_addr,
1383                                       (void **)&hpd);
1384         if (r) {
1385                 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1386                 return r;
1387         }
1388
1389         memset(hpd, 0, mec_hpd_size);
1390
1391         amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1392         amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1393
1394         return 0;
1395 }
1396
1397 static const u32 vgpr_init_compute_shader[] =
1398 {
1399         0x7e000209, 0x7e020208,
1400         0x7e040207, 0x7e060206,
1401         0x7e080205, 0x7e0a0204,
1402         0x7e0c0203, 0x7e0e0202,
1403         0x7e100201, 0x7e120200,
1404         0x7e140209, 0x7e160208,
1405         0x7e180207, 0x7e1a0206,
1406         0x7e1c0205, 0x7e1e0204,
1407         0x7e200203, 0x7e220202,
1408         0x7e240201, 0x7e260200,
1409         0x7e280209, 0x7e2a0208,
1410         0x7e2c0207, 0x7e2e0206,
1411         0x7e300205, 0x7e320204,
1412         0x7e340203, 0x7e360202,
1413         0x7e380201, 0x7e3a0200,
1414         0x7e3c0209, 0x7e3e0208,
1415         0x7e400207, 0x7e420206,
1416         0x7e440205, 0x7e460204,
1417         0x7e480203, 0x7e4a0202,
1418         0x7e4c0201, 0x7e4e0200,
1419         0x7e500209, 0x7e520208,
1420         0x7e540207, 0x7e560206,
1421         0x7e580205, 0x7e5a0204,
1422         0x7e5c0203, 0x7e5e0202,
1423         0x7e600201, 0x7e620200,
1424         0x7e640209, 0x7e660208,
1425         0x7e680207, 0x7e6a0206,
1426         0x7e6c0205, 0x7e6e0204,
1427         0x7e700203, 0x7e720202,
1428         0x7e740201, 0x7e760200,
1429         0x7e780209, 0x7e7a0208,
1430         0x7e7c0207, 0x7e7e0206,
1431         0xbf8a0000, 0xbf810000,
1432 };
1433
1434 static const u32 sgpr_init_compute_shader[] =
1435 {
1436         0xbe8a0100, 0xbe8c0102,
1437         0xbe8e0104, 0xbe900106,
1438         0xbe920108, 0xbe940100,
1439         0xbe960102, 0xbe980104,
1440         0xbe9a0106, 0xbe9c0108,
1441         0xbe9e0100, 0xbea00102,
1442         0xbea20104, 0xbea40106,
1443         0xbea60108, 0xbea80100,
1444         0xbeaa0102, 0xbeac0104,
1445         0xbeae0106, 0xbeb00108,
1446         0xbeb20100, 0xbeb40102,
1447         0xbeb60104, 0xbeb80106,
1448         0xbeba0108, 0xbebc0100,
1449         0xbebe0102, 0xbec00104,
1450         0xbec20106, 0xbec40108,
1451         0xbec60100, 0xbec80102,
1452         0xbee60004, 0xbee70005,
1453         0xbeea0006, 0xbeeb0007,
1454         0xbee80008, 0xbee90009,
1455         0xbefc0000, 0xbf8a0000,
1456         0xbf810000, 0x00000000,
1457 };
1458
1459 static const u32 vgpr_init_regs[] =
1460 {
1461         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1462         mmCOMPUTE_RESOURCE_LIMITS, 0,
1463         mmCOMPUTE_NUM_THREAD_X, 256*4,
1464         mmCOMPUTE_NUM_THREAD_Y, 1,
1465         mmCOMPUTE_NUM_THREAD_Z, 1,
1466         mmCOMPUTE_PGM_RSRC2, 20,
1467         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1468         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1469         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1470         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1471         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1472         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1473         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1474         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1475         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1476         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1477 };
1478
1479 static const u32 sgpr1_init_regs[] =
1480 {
1481         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1482         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1483         mmCOMPUTE_NUM_THREAD_X, 256*5,
1484         mmCOMPUTE_NUM_THREAD_Y, 1,
1485         mmCOMPUTE_NUM_THREAD_Z, 1,
1486         mmCOMPUTE_PGM_RSRC2, 20,
1487         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1488         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1489         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1490         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1491         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1492         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1493         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1494         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1495         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1496         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1497 };
1498
1499 static const u32 sgpr2_init_regs[] =
1500 {
1501         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1502         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1503         mmCOMPUTE_NUM_THREAD_X, 256*5,
1504         mmCOMPUTE_NUM_THREAD_Y, 1,
1505         mmCOMPUTE_NUM_THREAD_Z, 1,
1506         mmCOMPUTE_PGM_RSRC2, 20,
1507         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1508         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1509         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1510         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1511         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1512         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1513         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1514         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1515         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1516         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1517 };
1518
1519 static const u32 sec_ded_counter_registers[] =
1520 {
1521         mmCPC_EDC_ATC_CNT,
1522         mmCPC_EDC_SCRATCH_CNT,
1523         mmCPC_EDC_UCODE_CNT,
1524         mmCPF_EDC_ATC_CNT,
1525         mmCPF_EDC_ROQ_CNT,
1526         mmCPF_EDC_TAG_CNT,
1527         mmCPG_EDC_ATC_CNT,
1528         mmCPG_EDC_DMA_CNT,
1529         mmCPG_EDC_TAG_CNT,
1530         mmDC_EDC_CSINVOC_CNT,
1531         mmDC_EDC_RESTORE_CNT,
1532         mmDC_EDC_STATE_CNT,
1533         mmGDS_EDC_CNT,
1534         mmGDS_EDC_GRBM_CNT,
1535         mmGDS_EDC_OA_DED,
1536         mmSPI_EDC_CNT,
1537         mmSQC_ATC_EDC_GATCL1_CNT,
1538         mmSQC_EDC_CNT,
1539         mmSQ_EDC_DED_CNT,
1540         mmSQ_EDC_INFO,
1541         mmSQ_EDC_SEC_CNT,
1542         mmTCC_EDC_CNT,
1543         mmTCP_ATC_EDC_GATCL1_CNT,
1544         mmTCP_EDC_CNT,
1545         mmTD_EDC_CNT
1546 };
1547
1548 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1549 {
1550         struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1551         struct amdgpu_ib ib;
1552         struct dma_fence *f = NULL;
1553         int r, i;
1554         u32 tmp;
1555         unsigned total_size, vgpr_offset, sgpr_offset;
1556         u64 gpu_addr;
1557
1558         /* only supported on CZ */
1559         if (adev->asic_type != CHIP_CARRIZO)
1560                 return 0;
1561
1562         /* bail if the compute ring is not ready */
1563         if (!ring->ready)
1564                 return 0;
1565
1566         tmp = RREG32(mmGB_EDC_MODE);
1567         WREG32(mmGB_EDC_MODE, 0);
1568
1569         total_size =
1570                 (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1571         total_size +=
1572                 (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1573         total_size +=
1574                 (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1575         total_size = ALIGN(total_size, 256);
1576         vgpr_offset = total_size;
1577         total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1578         sgpr_offset = total_size;
1579         total_size += sizeof(sgpr_init_compute_shader);
1580
1581         /* allocate an indirect buffer to put the commands in */
1582         memset(&ib, 0, sizeof(ib));
1583         r = amdgpu_ib_get(adev, NULL, total_size, &ib);
1584         if (r) {
1585                 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1586                 return r;
1587         }
1588
1589         /* load the compute shaders */
1590         for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1591                 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1592
1593         for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1594                 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1595
1596         /* init the ib length to 0 */
1597         ib.length_dw = 0;
1598
1599         /* VGPR */
1600         /* write the register state for the compute dispatch */
1601         for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1602                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1603                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1604                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1605         }
1606         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1607         gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1608         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1609         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1610         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1611         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1612
1613         /* write dispatch packet */
1614         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1615         ib.ptr[ib.length_dw++] = 8; /* x */
1616         ib.ptr[ib.length_dw++] = 1; /* y */
1617         ib.ptr[ib.length_dw++] = 1; /* z */
1618         ib.ptr[ib.length_dw++] =
1619                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1620
1621         /* write CS partial flush packet */
1622         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1623         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1624
1625         /* SGPR1 */
1626         /* write the register state for the compute dispatch */
1627         for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1628                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1629                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1630                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1631         }
1632         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1633         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1634         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1635         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1636         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1637         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1638
1639         /* write dispatch packet */
1640         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1641         ib.ptr[ib.length_dw++] = 8; /* x */
1642         ib.ptr[ib.length_dw++] = 1; /* y */
1643         ib.ptr[ib.length_dw++] = 1; /* z */
1644         ib.ptr[ib.length_dw++] =
1645                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1646
1647         /* write CS partial flush packet */
1648         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1649         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1650
1651         /* SGPR2 */
1652         /* write the register state for the compute dispatch */
1653         for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1654                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1655                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1656                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1657         }
1658         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1659         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1660         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1661         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1662         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1663         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1664
1665         /* write dispatch packet */
1666         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1667         ib.ptr[ib.length_dw++] = 8; /* x */
1668         ib.ptr[ib.length_dw++] = 1; /* y */
1669         ib.ptr[ib.length_dw++] = 1; /* z */
1670         ib.ptr[ib.length_dw++] =
1671                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1672
1673         /* write CS partial flush packet */
1674         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1675         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1676
1677         /* shedule the ib on the ring */
1678         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1679         if (r) {
1680                 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1681                 goto fail;
1682         }
1683
1684         /* wait for the GPU to finish processing the IB */
1685         r = dma_fence_wait(f, false);
1686         if (r) {
1687                 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1688                 goto fail;
1689         }
1690
1691         tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1692         tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1693         WREG32(mmGB_EDC_MODE, tmp);
1694
1695         tmp = RREG32(mmCC_GC_EDC_CONFIG);
1696         tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1697         WREG32(mmCC_GC_EDC_CONFIG, tmp);
1698
1699
1700         /* read back registers to clear the counters */
1701         for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1702                 RREG32(sec_ded_counter_registers[i]);
1703
1704 fail:
1705         amdgpu_ib_free(adev, &ib, NULL);
1706         dma_fence_put(f);
1707
1708         return r;
1709 }
1710
1711 static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1712 {
1713         u32 gb_addr_config;
1714         u32 mc_shared_chmap, mc_arb_ramcfg;
1715         u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1716         u32 tmp;
1717         int ret;
1718
1719         switch (adev->asic_type) {
1720         case CHIP_TOPAZ:
1721                 adev->gfx.config.max_shader_engines = 1;
1722                 adev->gfx.config.max_tile_pipes = 2;
1723                 adev->gfx.config.max_cu_per_sh = 6;
1724                 adev->gfx.config.max_sh_per_se = 1;
1725                 adev->gfx.config.max_backends_per_se = 2;
1726                 adev->gfx.config.max_texture_channel_caches = 2;
1727                 adev->gfx.config.max_gprs = 256;
1728                 adev->gfx.config.max_gs_threads = 32;
1729                 adev->gfx.config.max_hw_contexts = 8;
1730
1731                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1732                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1733                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1734                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1735                 gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1736                 break;
1737         case CHIP_FIJI:
1738                 adev->gfx.config.max_shader_engines = 4;
1739                 adev->gfx.config.max_tile_pipes = 16;
1740                 adev->gfx.config.max_cu_per_sh = 16;
1741                 adev->gfx.config.max_sh_per_se = 1;
1742                 adev->gfx.config.max_backends_per_se = 4;
1743                 adev->gfx.config.max_texture_channel_caches = 16;
1744                 adev->gfx.config.max_gprs = 256;
1745                 adev->gfx.config.max_gs_threads = 32;
1746                 adev->gfx.config.max_hw_contexts = 8;
1747
1748                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1749                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1750                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1751                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1752                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1753                 break;
1754         case CHIP_POLARIS11:
1755         case CHIP_POLARIS12:
1756                 ret = amdgpu_atombios_get_gfx_info(adev);
1757                 if (ret)
1758                         return ret;
1759                 adev->gfx.config.max_gprs = 256;
1760                 adev->gfx.config.max_gs_threads = 32;
1761                 adev->gfx.config.max_hw_contexts = 8;
1762
1763                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1764                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1765                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1766                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1767                 gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
1768                 break;
1769         case CHIP_POLARIS10:
1770                 ret = amdgpu_atombios_get_gfx_info(adev);
1771                 if (ret)
1772                         return ret;
1773                 adev->gfx.config.max_gprs = 256;
1774                 adev->gfx.config.max_gs_threads = 32;
1775                 adev->gfx.config.max_hw_contexts = 8;
1776
1777                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1778                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1779                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1780                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1781                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1782                 break;
1783         case CHIP_TONGA:
1784                 adev->gfx.config.max_shader_engines = 4;
1785                 adev->gfx.config.max_tile_pipes = 8;
1786                 adev->gfx.config.max_cu_per_sh = 8;
1787                 adev->gfx.config.max_sh_per_se = 1;
1788                 adev->gfx.config.max_backends_per_se = 2;
1789                 adev->gfx.config.max_texture_channel_caches = 8;
1790                 adev->gfx.config.max_gprs = 256;
1791                 adev->gfx.config.max_gs_threads = 32;
1792                 adev->gfx.config.max_hw_contexts = 8;
1793
1794                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1795                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1796                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1797                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1798                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1799                 break;
1800         case CHIP_CARRIZO:
1801                 adev->gfx.config.max_shader_engines = 1;
1802                 adev->gfx.config.max_tile_pipes = 2;
1803                 adev->gfx.config.max_sh_per_se = 1;
1804                 adev->gfx.config.max_backends_per_se = 2;
1805                 adev->gfx.config.max_cu_per_sh = 8;
1806                 adev->gfx.config.max_texture_channel_caches = 2;
1807                 adev->gfx.config.max_gprs = 256;
1808                 adev->gfx.config.max_gs_threads = 32;
1809                 adev->gfx.config.max_hw_contexts = 8;
1810
1811                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1812                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1813                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1814                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1815                 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1816                 break;
1817         case CHIP_STONEY:
1818                 adev->gfx.config.max_shader_engines = 1;
1819                 adev->gfx.config.max_tile_pipes = 2;
1820                 adev->gfx.config.max_sh_per_se = 1;
1821                 adev->gfx.config.max_backends_per_se = 1;
1822                 adev->gfx.config.max_cu_per_sh = 3;
1823                 adev->gfx.config.max_texture_channel_caches = 2;
1824                 adev->gfx.config.max_gprs = 256;
1825                 adev->gfx.config.max_gs_threads = 16;
1826                 adev->gfx.config.max_hw_contexts = 8;
1827
1828                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1829                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1830                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1831                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1832                 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1833                 break;
1834         default:
1835                 adev->gfx.config.max_shader_engines = 2;
1836                 adev->gfx.config.max_tile_pipes = 4;
1837                 adev->gfx.config.max_cu_per_sh = 2;
1838                 adev->gfx.config.max_sh_per_se = 1;
1839                 adev->gfx.config.max_backends_per_se = 2;
1840                 adev->gfx.config.max_texture_channel_caches = 4;
1841                 adev->gfx.config.max_gprs = 256;
1842                 adev->gfx.config.max_gs_threads = 32;
1843                 adev->gfx.config.max_hw_contexts = 8;
1844
1845                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1846                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1847                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1848                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1849                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1850                 break;
1851         }
1852
1853         mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
1854         adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1855         mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1856
1857         adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1858         adev->gfx.config.mem_max_burst_length_bytes = 256;
1859         if (adev->flags & AMD_IS_APU) {
1860                 /* Get memory bank mapping mode. */
1861                 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
1862                 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1863                 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1864
1865                 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
1866                 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1867                 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1868
1869                 /* Validate settings in case only one DIMM installed. */
1870                 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
1871                         dimm00_addr_map = 0;
1872                 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
1873                         dimm01_addr_map = 0;
1874                 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
1875                         dimm10_addr_map = 0;
1876                 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
1877                         dimm11_addr_map = 0;
1878
1879                 /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1880                 /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1881                 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
1882                         adev->gfx.config.mem_row_size_in_kb = 2;
1883                 else
1884                         adev->gfx.config.mem_row_size_in_kb = 1;
1885         } else {
1886                 tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
1887                 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1888                 if (adev->gfx.config.mem_row_size_in_kb > 4)
1889                         adev->gfx.config.mem_row_size_in_kb = 4;
1890         }
1891
1892         adev->gfx.config.shader_engine_tile_size = 32;
1893         adev->gfx.config.num_gpus = 1;
1894         adev->gfx.config.multi_gpu_tile_size = 64;
1895
1896         /* fix up row size */
1897         switch (adev->gfx.config.mem_row_size_in_kb) {
1898         case 1:
1899         default:
1900                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
1901                 break;
1902         case 2:
1903                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
1904                 break;
1905         case 4:
1906                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
1907                 break;
1908         }
1909         adev->gfx.config.gb_addr_config = gb_addr_config;
1910
1911         return 0;
1912 }
1913
1914 static int gfx_v8_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
1915                                         int mec, int pipe, int queue)
1916 {
1917         int r;
1918         unsigned irq_type;
1919         struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
1920
1921         ring = &adev->gfx.compute_ring[ring_id];
1922
1923         /* mec0 is me1 */
1924         ring->me = mec + 1;
1925         ring->pipe = pipe;
1926         ring->queue = queue;
1927
1928         ring->ring_obj = NULL;
1929         ring->use_doorbell = true;
1930         ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + ring_id;
1931         ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
1932                                 + (ring_id * GFX8_MEC_HPD_SIZE);
1933         sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
1934
1935         irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
1936                 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
1937                 + ring->pipe;
1938
1939         /* type-2 packets are deprecated on MEC, use type-3 instead */
1940         r = amdgpu_ring_init(adev, ring, 1024,
1941                         &adev->gfx.eop_irq, irq_type);
1942         if (r)
1943                 return r;
1944
1945
1946         return 0;
1947 }
1948
1949 static int gfx_v8_0_sw_init(void *handle)
1950 {
1951         int i, j, k, r, ring_id;
1952         struct amdgpu_ring *ring;
1953         struct amdgpu_kiq *kiq;
1954         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1955
1956         switch (adev->asic_type) {
1957         case CHIP_FIJI:
1958         case CHIP_TONGA:
1959         case CHIP_POLARIS11:
1960         case CHIP_POLARIS12:
1961         case CHIP_POLARIS10:
1962         case CHIP_CARRIZO:
1963                 adev->gfx.mec.num_mec = 2;
1964                 break;
1965         case CHIP_TOPAZ:
1966         case CHIP_STONEY:
1967         default:
1968                 adev->gfx.mec.num_mec = 1;
1969                 break;
1970         }
1971
1972         adev->gfx.mec.num_pipe_per_mec = 4;
1973         adev->gfx.mec.num_queue_per_pipe = 8;
1974
1975         /* KIQ event */
1976         r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 178, &adev->gfx.kiq.irq);
1977         if (r)
1978                 return r;
1979
1980         /* EOP Event */
1981         r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 181, &adev->gfx.eop_irq);
1982         if (r)
1983                 return r;
1984
1985         /* Privileged reg */
1986         r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 184,
1987                               &adev->gfx.priv_reg_irq);
1988         if (r)
1989                 return r;
1990
1991         /* Privileged inst */
1992         r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 185,
1993                               &adev->gfx.priv_inst_irq);
1994         if (r)
1995                 return r;
1996
1997         adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
1998
1999         gfx_v8_0_scratch_init(adev);
2000
2001         r = gfx_v8_0_init_microcode(adev);
2002         if (r) {
2003                 DRM_ERROR("Failed to load gfx firmware!\n");
2004                 return r;
2005         }
2006
2007         r = gfx_v8_0_rlc_init(adev);
2008         if (r) {
2009                 DRM_ERROR("Failed to init rlc BOs!\n");
2010                 return r;
2011         }
2012
2013         r = gfx_v8_0_mec_init(adev);
2014         if (r) {
2015                 DRM_ERROR("Failed to init MEC BOs!\n");
2016                 return r;
2017         }
2018
2019         /* set up the gfx ring */
2020         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2021                 ring = &adev->gfx.gfx_ring[i];
2022                 ring->ring_obj = NULL;
2023                 sprintf(ring->name, "gfx");
2024                 /* no gfx doorbells on iceland */
2025                 if (adev->asic_type != CHIP_TOPAZ) {
2026                         ring->use_doorbell = true;
2027                         ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0;
2028                 }
2029
2030                 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2031                                      AMDGPU_CP_IRQ_GFX_EOP);
2032                 if (r)
2033                         return r;
2034         }
2035
2036
2037         /* set up the compute queues - allocate horizontally across pipes */
2038         ring_id = 0;
2039         for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
2040                 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
2041                         for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
2042                                 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
2043                                         continue;
2044
2045                                 r = gfx_v8_0_compute_ring_init(adev,
2046                                                                 ring_id,
2047                                                                 i, k, j);
2048                                 if (r)
2049                                         return r;
2050
2051                                 ring_id++;
2052                         }
2053                 }
2054         }
2055
2056         r = amdgpu_gfx_kiq_init(adev, GFX8_MEC_HPD_SIZE);
2057         if (r) {
2058                 DRM_ERROR("Failed to init KIQ BOs!\n");
2059                 return r;
2060         }
2061
2062         kiq = &adev->gfx.kiq;
2063         r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
2064         if (r)
2065                 return r;
2066
2067         /* create MQD for all compute queues as well as KIQ for SRIOV case */
2068         r = amdgpu_gfx_compute_mqd_sw_init(adev, sizeof(struct vi_mqd_allocation));
2069         if (r)
2070                 return r;
2071
2072         /* reserve GDS, GWS and OA resource for gfx */
2073         r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size,
2074                                     PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS,
2075                                     &adev->gds.gds_gfx_bo, NULL, NULL);
2076         if (r)
2077                 return r;
2078
2079         r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size,
2080                                     PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS,
2081                                     &adev->gds.gws_gfx_bo, NULL, NULL);
2082         if (r)
2083                 return r;
2084
2085         r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size,
2086                                     PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA,
2087                                     &adev->gds.oa_gfx_bo, NULL, NULL);
2088         if (r)
2089                 return r;
2090
2091         adev->gfx.ce_ram_size = 0x8000;
2092
2093         r = gfx_v8_0_gpu_early_init(adev);
2094         if (r)
2095                 return r;
2096
2097         return 0;
2098 }
2099
2100 static int gfx_v8_0_sw_fini(void *handle)
2101 {
2102         int i;
2103         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2104
2105         amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
2106         amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL);
2107         amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
2108
2109         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2110                 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2111         for (i = 0; i < adev->gfx.num_compute_rings; i++)
2112                 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2113
2114         amdgpu_gfx_compute_mqd_sw_fini(adev);
2115         amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
2116         amdgpu_gfx_kiq_fini(adev);
2117         amdgpu_bo_free_kernel(&adev->virt.csa_obj, &adev->virt.csa_vmid0_addr, NULL);
2118
2119         gfx_v8_0_mec_fini(adev);
2120         gfx_v8_0_rlc_fini(adev);
2121         gfx_v8_0_free_microcode(adev);
2122
2123         return 0;
2124 }
2125
2126 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
2127 {
2128         uint32_t *modearray, *mod2array;
2129         const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
2130         const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
2131         u32 reg_offset;
2132
2133         modearray = adev->gfx.config.tile_mode_array;
2134         mod2array = adev->gfx.config.macrotile_mode_array;
2135
2136         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2137                 modearray[reg_offset] = 0;
2138
2139         for (reg_offset = 0; reg_offset <  num_secondary_tile_mode_states; reg_offset++)
2140                 mod2array[reg_offset] = 0;
2141
2142         switch (adev->asic_type) {
2143         case CHIP_TOPAZ:
2144                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2145                                 PIPE_CONFIG(ADDR_SURF_P2) |
2146                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2147                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2148                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2149                                 PIPE_CONFIG(ADDR_SURF_P2) |
2150                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2151                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2152                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2153                                 PIPE_CONFIG(ADDR_SURF_P2) |
2154                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2155                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2156                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2157                                 PIPE_CONFIG(ADDR_SURF_P2) |
2158                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2159                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2160                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2161                                 PIPE_CONFIG(ADDR_SURF_P2) |
2162                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2163                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2164                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2165                                 PIPE_CONFIG(ADDR_SURF_P2) |
2166                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2167                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2168                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2169                                 PIPE_CONFIG(ADDR_SURF_P2) |
2170                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2171                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2172                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2173                                 PIPE_CONFIG(ADDR_SURF_P2));
2174                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2175                                 PIPE_CONFIG(ADDR_SURF_P2) |
2176                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2177                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2178                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2179                                  PIPE_CONFIG(ADDR_SURF_P2) |
2180                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2181                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2182                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2183                                  PIPE_CONFIG(ADDR_SURF_P2) |
2184                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2185                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2186                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2187                                  PIPE_CONFIG(ADDR_SURF_P2) |
2188                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2189                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2190                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2191                                  PIPE_CONFIG(ADDR_SURF_P2) |
2192                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2193                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2194                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2195                                  PIPE_CONFIG(ADDR_SURF_P2) |
2196                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2197                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2198                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2199                                  PIPE_CONFIG(ADDR_SURF_P2) |
2200                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2201                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2202                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2203                                  PIPE_CONFIG(ADDR_SURF_P2) |
2204                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2205                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2206                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2207                                  PIPE_CONFIG(ADDR_SURF_P2) |
2208                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2209                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2210                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2211                                  PIPE_CONFIG(ADDR_SURF_P2) |
2212                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2213                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2214                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2215                                  PIPE_CONFIG(ADDR_SURF_P2) |
2216                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2217                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2218                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2219                                  PIPE_CONFIG(ADDR_SURF_P2) |
2220                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2221                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2222                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2223                                  PIPE_CONFIG(ADDR_SURF_P2) |
2224                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2225                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2226                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2227                                  PIPE_CONFIG(ADDR_SURF_P2) |
2228                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2229                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2230                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2231                                  PIPE_CONFIG(ADDR_SURF_P2) |
2232                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2233                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2234                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2235                                  PIPE_CONFIG(ADDR_SURF_P2) |
2236                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2237                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2238                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2239                                  PIPE_CONFIG(ADDR_SURF_P2) |
2240                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2241                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2242                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2243                                  PIPE_CONFIG(ADDR_SURF_P2) |
2244                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2245                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2246
2247                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2248                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2249                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2250                                 NUM_BANKS(ADDR_SURF_8_BANK));
2251                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2252                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2253                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2254                                 NUM_BANKS(ADDR_SURF_8_BANK));
2255                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2256                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2257                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2258                                 NUM_BANKS(ADDR_SURF_8_BANK));
2259                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2260                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2261                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2262                                 NUM_BANKS(ADDR_SURF_8_BANK));
2263                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2264                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2265                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2266                                 NUM_BANKS(ADDR_SURF_8_BANK));
2267                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2268                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2269                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2270                                 NUM_BANKS(ADDR_SURF_8_BANK));
2271                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2272                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2273                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2274                                 NUM_BANKS(ADDR_SURF_8_BANK));
2275                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2276                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2277                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2278                                 NUM_BANKS(ADDR_SURF_16_BANK));
2279                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2280                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2281                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2282                                 NUM_BANKS(ADDR_SURF_16_BANK));
2283                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2284                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2285                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2286                                  NUM_BANKS(ADDR_SURF_16_BANK));
2287                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2288                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2289                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2290                                  NUM_BANKS(ADDR_SURF_16_BANK));
2291                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2292                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2293                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2294                                  NUM_BANKS(ADDR_SURF_16_BANK));
2295                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2296                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2297                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2298                                  NUM_BANKS(ADDR_SURF_16_BANK));
2299                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2300                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2301                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2302                                  NUM_BANKS(ADDR_SURF_8_BANK));
2303
2304                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2305                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2306                             reg_offset != 23)
2307                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2308
2309                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2310                         if (reg_offset != 7)
2311                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2312
2313                 break;
2314         case CHIP_FIJI:
2315                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2316                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2317                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2318                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2319                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2320                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2321                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2322                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2323                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2324                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2325                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2326                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2327                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2328                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2329                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2330                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2331                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2332                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2333                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2334                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2335                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2336                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2337                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2338                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2339                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2340                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2341                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2342                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2343                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2344                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2345                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2346                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2347                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2348                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2349                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2350                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2351                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2352                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2353                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2354                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2355                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2356                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2357                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2358                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2359                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2360                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2361                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2362                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2363                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2364                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2365                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2366                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2367                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2368                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2369                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2370                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2371                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2372                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2373                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2374                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2375                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2376                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2377                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2378                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2379                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2380                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2381                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2382                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2383                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2384                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2385                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2386                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2387                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2388                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2389                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2390                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2391                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2392                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2393                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2394                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2395                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2396                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2397                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2398                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2399                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2400                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2401                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2402                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2403                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2404                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2405                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2406                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2407                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2408                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2409                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2410                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2411                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2412                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2413                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2414                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2415                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2416                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2417                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2418                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2419                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2420                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2421                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2422                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2423                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2424                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2425                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2426                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2427                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2428                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2429                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2430                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2431                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2432                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2433                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2434                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2435                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2436                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2437
2438                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2439                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2440                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2441                                 NUM_BANKS(ADDR_SURF_8_BANK));
2442                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2443                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2444                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2445                                 NUM_BANKS(ADDR_SURF_8_BANK));
2446                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2447                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2448                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2449                                 NUM_BANKS(ADDR_SURF_8_BANK));
2450                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2451                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2452                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2453                                 NUM_BANKS(ADDR_SURF_8_BANK));
2454                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2455                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2456                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2457                                 NUM_BANKS(ADDR_SURF_8_BANK));
2458                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2459                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2460                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2461                                 NUM_BANKS(ADDR_SURF_8_BANK));
2462                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2463                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2464                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2465                                 NUM_BANKS(ADDR_SURF_8_BANK));
2466                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2467                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2468                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2469                                 NUM_BANKS(ADDR_SURF_8_BANK));
2470                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2471                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2472                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2473                                 NUM_BANKS(ADDR_SURF_8_BANK));
2474                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2475                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2476                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2477                                  NUM_BANKS(ADDR_SURF_8_BANK));
2478                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2479                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2480                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2481                                  NUM_BANKS(ADDR_SURF_8_BANK));
2482                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2483                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2484                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2485                                  NUM_BANKS(ADDR_SURF_8_BANK));
2486                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2487                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2488                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2489                                  NUM_BANKS(ADDR_SURF_8_BANK));
2490                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2491                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2492                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2493                                  NUM_BANKS(ADDR_SURF_4_BANK));
2494
2495                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2496                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2497
2498                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2499                         if (reg_offset != 7)
2500                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2501
2502                 break;
2503         case CHIP_TONGA:
2504                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2505                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2506                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2507                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2508                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2509                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2510                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2511                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2512                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2513                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2514                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2515                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2516                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2517                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2518                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2519                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2520                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2521                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2522                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2523                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2524                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2525                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2526                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2527                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2528                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2529                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2530                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2531                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2532                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2533                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2534                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2535                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2536                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2537                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2538                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2539                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2540                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2541                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2542                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2543                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2544                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2545                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2546                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2547                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2548                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2549                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2550                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2551                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2552                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2553                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2554                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2555                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2556                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2557                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2558                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2559                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2560                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2561                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2562                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2563                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2564                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2565                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2566                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2567                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2568                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2569                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2570                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2571                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2572                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2573                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2574                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2575                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2576                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2577                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2578                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2579                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2580                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2581                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2582                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2583                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2584                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2585                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2586                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2587                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2588                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2589                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2590                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2591                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2592                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2593                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2594                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2595                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2596                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2597                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2598                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2599                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2600                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2601                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2602                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2603                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2604                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2605                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2606                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2607                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2608                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2609                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2610                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2611                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2612                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2613                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2614                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2615                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2616                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2617                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2618                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2619                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2620                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2621                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2622                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2623                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2624                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2625                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2626
2627                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2628                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2629                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2630                                 NUM_BANKS(ADDR_SURF_16_BANK));
2631                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2632                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2633                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2634                                 NUM_BANKS(ADDR_SURF_16_BANK));
2635                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2636                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2637                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2638                                 NUM_BANKS(ADDR_SURF_16_BANK));
2639                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2640                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2641                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2642                                 NUM_BANKS(ADDR_SURF_16_BANK));
2643                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2644                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2645                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2646                                 NUM_BANKS(ADDR_SURF_16_BANK));
2647                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2648                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2649                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2650                                 NUM_BANKS(ADDR_SURF_16_BANK));
2651                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2652                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2653                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2654                                 NUM_BANKS(ADDR_SURF_16_BANK));
2655                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2656                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2657                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2658                                 NUM_BANKS(ADDR_SURF_16_BANK));
2659                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2660                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2661                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2662                                 NUM_BANKS(ADDR_SURF_16_BANK));
2663                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2664                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2665                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2666                                  NUM_BANKS(ADDR_SURF_16_BANK));
2667                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2668                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2669                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2670                                  NUM_BANKS(ADDR_SURF_16_BANK));
2671                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2672                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2673                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2674                                  NUM_BANKS(ADDR_SURF_8_BANK));
2675                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2676                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2677                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2678                                  NUM_BANKS(ADDR_SURF_4_BANK));
2679                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2680                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2681                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2682                                  NUM_BANKS(ADDR_SURF_4_BANK));
2683
2684                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2685                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2686
2687                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2688                         if (reg_offset != 7)
2689                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2690
2691                 break;
2692         case CHIP_POLARIS11:
2693         case CHIP_POLARIS12:
2694                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2695                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2696                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2697                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2698                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2699                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2700                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2701                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2702                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2703                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2704                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2705                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2706                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2707                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2708                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2709                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2710                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2711                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2712                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2713                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2714                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2715                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2716                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2717                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2718                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2719                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2720                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2721                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2722                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2723                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2724                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2725                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2726                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2727                                 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2728                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2729                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2730                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2731                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2732                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2733                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2734                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2735                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2736                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2737                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2738                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2739                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2740                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2741                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2742                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2743                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2744                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2745                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2746                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2747                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2748                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2749                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2750                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2751                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2752                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2753                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2754                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2755                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2756                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2757                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2758                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2759                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2760                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2761                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2762                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2763                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2764                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2765                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2766                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2767                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2768                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2769                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2770                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2771                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2772                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2773                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2774                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2775                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2776                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2777                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2778                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2779                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2780                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2781                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2782                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2783                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2784                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2785                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2786                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2787                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2788                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2789                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2790                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2791                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2792                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2793                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2794                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2795                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2796                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2797                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2798                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2799                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2800                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2801                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2802                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2803                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2804                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2805                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2806                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2807                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2808                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2809                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2810                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2811                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2812                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2813                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2814                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2815                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2816
2817                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2818                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2819                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2820                                 NUM_BANKS(ADDR_SURF_16_BANK));
2821
2822                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2823                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2824                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2825                                 NUM_BANKS(ADDR_SURF_16_BANK));
2826
2827                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2828                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2829                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2830                                 NUM_BANKS(ADDR_SURF_16_BANK));
2831
2832                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2833                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2834                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2835                                 NUM_BANKS(ADDR_SURF_16_BANK));
2836
2837                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2838                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2839                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2840                                 NUM_BANKS(ADDR_SURF_16_BANK));
2841
2842                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2843                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2844                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2845                                 NUM_BANKS(ADDR_SURF_16_BANK));
2846
2847                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2848                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2849                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2850                                 NUM_BANKS(ADDR_SURF_16_BANK));
2851
2852                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2853                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2854                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2855                                 NUM_BANKS(ADDR_SURF_16_BANK));
2856
2857                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2858                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2859                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2860                                 NUM_BANKS(ADDR_SURF_16_BANK));
2861
2862                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2863                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2864                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2865                                 NUM_BANKS(ADDR_SURF_16_BANK));
2866
2867                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2868                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2869                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2870                                 NUM_BANKS(ADDR_SURF_16_BANK));
2871
2872                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2873                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2874                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2875                                 NUM_BANKS(ADDR_SURF_16_BANK));
2876
2877                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2878                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2879                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2880                                 NUM_BANKS(ADDR_SURF_8_BANK));
2881
2882                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2883                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2884                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2885                                 NUM_BANKS(ADDR_SURF_4_BANK));
2886
2887                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2888                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2889
2890                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2891                         if (reg_offset != 7)
2892                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2893
2894                 break;
2895         case CHIP_POLARIS10:
2896                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2897                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2898                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2899                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2900                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2901                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2902                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2903                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2904                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2905                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2906                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2907                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2908                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2909                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2910                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2911                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2912                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2913                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2914                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2915                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2916                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2917                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2918                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2919                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2920                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2921                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2922                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2923                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2924                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2925                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2926                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2927                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2928                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2929                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2930                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2931                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2932                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2933                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2934                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2935                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2936                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2937                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2938                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2939                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2940                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2941                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2942                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2943                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2944                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2945                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2946                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2947                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2948                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2949                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2950                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2951                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2952                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2953                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2954                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2955                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2956                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2957                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2958                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2959                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2960                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2961                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2962                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2963                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2964                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2965                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2966                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2967                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2968                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2969                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2970                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2971                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2972                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2973                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2974                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2975                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2976                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2977                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2978                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2979                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2980                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2981                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2982                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2983                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2984                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2985                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2986                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2987                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2988                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2989                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2990                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2991                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2992                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2993                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2994                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2995                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2996                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2997                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2998                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2999                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3000                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3001                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3002                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3003                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3004                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3005                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3006                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3007                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3008                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3009                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3010                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3011                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3012                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3013                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3014                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3015                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3016                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3017                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3018
3019                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3020                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3021                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3022                                 NUM_BANKS(ADDR_SURF_16_BANK));
3023
3024                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3025                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3026                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3027                                 NUM_BANKS(ADDR_SURF_16_BANK));
3028
3029                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3030                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3031                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3032                                 NUM_BANKS(ADDR_SURF_16_BANK));
3033
3034                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3035                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3036                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3037                                 NUM_BANKS(ADDR_SURF_16_BANK));
3038
3039                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3040                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3041                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3042                                 NUM_BANKS(ADDR_SURF_16_BANK));
3043
3044                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3045                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3046                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3047                                 NUM_BANKS(ADDR_SURF_16_BANK));
3048
3049                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3050                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3051                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3052                                 NUM_BANKS(ADDR_SURF_16_BANK));
3053
3054                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3055                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3056                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3057                                 NUM_BANKS(ADDR_SURF_16_BANK));
3058
3059                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3060                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3061                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3062                                 NUM_BANKS(ADDR_SURF_16_BANK));
3063
3064                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3065                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3066                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3067                                 NUM_BANKS(ADDR_SURF_16_BANK));
3068
3069                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3070                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3071                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3072                                 NUM_BANKS(ADDR_SURF_16_BANK));
3073
3074                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3075                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3076                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3077                                 NUM_BANKS(ADDR_SURF_8_BANK));
3078
3079                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3080                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3081                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3082                                 NUM_BANKS(ADDR_SURF_4_BANK));
3083
3084                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3085                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3086                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3087                                 NUM_BANKS(ADDR_SURF_4_BANK));
3088
3089                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3090                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3091
3092                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3093                         if (reg_offset != 7)
3094                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3095
3096                 break;
3097         case CHIP_STONEY:
3098                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3099                                 PIPE_CONFIG(ADDR_SURF_P2) |
3100                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3101                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3102                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3103                                 PIPE_CONFIG(ADDR_SURF_P2) |
3104                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3105                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3106                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3107                                 PIPE_CONFIG(ADDR_SURF_P2) |
3108                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3109                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3110                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3111                                 PIPE_CONFIG(ADDR_SURF_P2) |
3112                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3113                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3114                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3115                                 PIPE_CONFIG(ADDR_SURF_P2) |
3116                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3117                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3118                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3119                                 PIPE_CONFIG(ADDR_SURF_P2) |
3120                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3121                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3122                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3123                                 PIPE_CONFIG(ADDR_SURF_P2) |
3124                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3125                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3126                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3127                                 PIPE_CONFIG(ADDR_SURF_P2));
3128                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3129                                 PIPE_CONFIG(ADDR_SURF_P2) |
3130                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3131                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3132                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3133                                  PIPE_CONFIG(ADDR_SURF_P2) |
3134                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3135                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3136                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3137                                  PIPE_CONFIG(ADDR_SURF_P2) |
3138                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3139                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3140                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3141                                  PIPE_CONFIG(ADDR_SURF_P2) |
3142                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3143                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3144                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3145                                  PIPE_CONFIG(ADDR_SURF_P2) |
3146                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3147                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3148                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3149                                  PIPE_CONFIG(ADDR_SURF_P2) |
3150                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3151                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3152                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3153                                  PIPE_CONFIG(ADDR_SURF_P2) |
3154                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3155                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3156                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3157                                  PIPE_CONFIG(ADDR_SURF_P2) |
3158                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3159                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3160                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3161                                  PIPE_CONFIG(ADDR_SURF_P2) |
3162                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3163                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3164                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3165                                  PIPE_CONFIG(ADDR_SURF_P2) |
3166                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3167                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3168                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3169                                  PIPE_CONFIG(ADDR_SURF_P2) |
3170                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3171                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3172                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3173                                  PIPE_CONFIG(ADDR_SURF_P2) |
3174                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3175                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3176                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3177                                  PIPE_CONFIG(ADDR_SURF_P2) |
3178                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3179                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3180                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3181                                  PIPE_CONFIG(ADDR_SURF_P2) |
3182                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3183                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3184                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3185                                  PIPE_CONFIG(ADDR_SURF_P2) |
3186                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3187                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3188                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3189                                  PIPE_CONFIG(ADDR_SURF_P2) |
3190                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3191                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3192                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3193                                  PIPE_CONFIG(ADDR_SURF_P2) |
3194                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3195                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3196                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3197                                  PIPE_CONFIG(ADDR_SURF_P2) |
3198                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3199                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3200
3201                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3202                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3203                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3204                                 NUM_BANKS(ADDR_SURF_8_BANK));
3205                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3206                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3207                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3208                                 NUM_BANKS(ADDR_SURF_8_BANK));
3209                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3210                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3211                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3212                                 NUM_BANKS(ADDR_SURF_8_BANK));
3213                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3214                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3215                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3216                                 NUM_BANKS(ADDR_SURF_8_BANK));
3217                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3218                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3219                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3220                                 NUM_BANKS(ADDR_SURF_8_BANK));
3221                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3222                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3223                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3224                                 NUM_BANKS(ADDR_SURF_8_BANK));
3225                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3226                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3227                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3228                                 NUM_BANKS(ADDR_SURF_8_BANK));
3229                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3230                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3231                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3232                                 NUM_BANKS(ADDR_SURF_16_BANK));
3233                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3234                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3235                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3236                                 NUM_BANKS(ADDR_SURF_16_BANK));
3237                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3238                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3239                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3240                                  NUM_BANKS(ADDR_SURF_16_BANK));
3241                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3242                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3243                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3244                                  NUM_BANKS(ADDR_SURF_16_BANK));
3245                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3246                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3247                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3248                                  NUM_BANKS(ADDR_SURF_16_BANK));
3249                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3250                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3251                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3252                                  NUM_BANKS(ADDR_SURF_16_BANK));
3253                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3254                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3255                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3256                                  NUM_BANKS(ADDR_SURF_8_BANK));
3257
3258                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3259                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3260                             reg_offset != 23)
3261                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3262
3263                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3264                         if (reg_offset != 7)
3265                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3266
3267                 break;
3268         default:
3269                 dev_warn(adev->dev,
3270                          "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
3271                          adev->asic_type);
3272
3273         case CHIP_CARRIZO:
3274                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3275                                 PIPE_CONFIG(ADDR_SURF_P2) |
3276                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3277                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3278                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3279                                 PIPE_CONFIG(ADDR_SURF_P2) |
3280                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3281                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3282                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3283                                 PIPE_CONFIG(ADDR_SURF_P2) |
3284                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3285                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3286                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3287                                 PIPE_CONFIG(ADDR_SURF_P2) |
3288                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3289                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3290                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3291                                 PIPE_CONFIG(ADDR_SURF_P2) |
3292                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3293                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3294                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3295                                 PIPE_CONFIG(ADDR_SURF_P2) |
3296                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3297                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3298                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3299                                 PIPE_CONFIG(ADDR_SURF_P2) |
3300                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3301                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3302                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3303                                 PIPE_CONFIG(ADDR_SURF_P2));
3304                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3305                                 PIPE_CONFIG(ADDR_SURF_P2) |
3306                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3307                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3308                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3309                                  PIPE_CONFIG(ADDR_SURF_P2) |
3310                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3311                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3312                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3313                                  PIPE_CONFIG(ADDR_SURF_P2) |
3314                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3315                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3316                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3317                                  PIPE_CONFIG(ADDR_SURF_P2) |
3318                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3319                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3320                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3321                                  PIPE_CONFIG(ADDR_SURF_P2) |
3322                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3323                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3324                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3325                                  PIPE_CONFIG(ADDR_SURF_P2) |
3326                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3327                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3328                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3329                                  PIPE_CONFIG(ADDR_SURF_P2) |
3330                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3331                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3332                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3333                                  PIPE_CONFIG(ADDR_SURF_P2) |
3334                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3335                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3336                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3337                                  PIPE_CONFIG(ADDR_SURF_P2) |
3338                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3339                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3340                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3341                                  PIPE_CONFIG(ADDR_SURF_P2) |
3342                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3343                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3344                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3345                                  PIPE_CONFIG(ADDR_SURF_P2) |
3346                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3347                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3348                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3349                                  PIPE_CONFIG(ADDR_SURF_P2) |
3350                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3351                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3352                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3353                                  PIPE_CONFIG(ADDR_SURF_P2) |
3354                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3355                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3356                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3357                                  PIPE_CONFIG(ADDR_SURF_P2) |
3358                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3359                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3360                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3361                                  PIPE_CONFIG(ADDR_SURF_P2) |
3362                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3363                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3364                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3365                                  PIPE_CONFIG(ADDR_SURF_P2) |
3366                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3367                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3368                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3369                                  PIPE_CONFIG(ADDR_SURF_P2) |
3370                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3371                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3372                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3373                                  PIPE_CONFIG(ADDR_SURF_P2) |
3374                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3375                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3376
3377                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3378                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3379                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3380                                 NUM_BANKS(ADDR_SURF_8_BANK));
3381                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3382                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3383                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3384                                 NUM_BANKS(ADDR_SURF_8_BANK));
3385                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3386                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3387                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3388                                 NUM_BANKS(ADDR_SURF_8_BANK));
3389                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3390                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3391                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3392                                 NUM_BANKS(ADDR_SURF_8_BANK));
3393                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3394                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3395                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3396                                 NUM_BANKS(ADDR_SURF_8_BANK));
3397                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3398                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3399                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3400                                 NUM_BANKS(ADDR_SURF_8_BANK));
3401                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3402                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3403                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3404                                 NUM_BANKS(ADDR_SURF_8_BANK));
3405                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3406                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3407                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3408                                 NUM_BANKS(ADDR_SURF_16_BANK));
3409                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3410                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3411                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3412                                 NUM_BANKS(ADDR_SURF_16_BANK));
3413                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3414                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3415                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3416                                  NUM_BANKS(ADDR_SURF_16_BANK));
3417                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3418                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3419                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3420                                  NUM_BANKS(ADDR_SURF_16_BANK));
3421                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3422                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3423                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3424                                  NUM_BANKS(ADDR_SURF_16_BANK));
3425                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3426                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3427                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3428                                  NUM_BANKS(ADDR_SURF_16_BANK));
3429                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3430                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3431                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3432                                  NUM_BANKS(ADDR_SURF_8_BANK));
3433
3434                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3435                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3436                             reg_offset != 23)
3437                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3438
3439                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3440                         if (reg_offset != 7)
3441                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3442
3443                 break;
3444         }
3445 }
3446
3447 static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
3448                                   u32 se_num, u32 sh_num, u32 instance)
3449 {
3450         u32 data;
3451
3452         if (instance == 0xffffffff)
3453                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
3454         else
3455                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
3456
3457         if (se_num == 0xffffffff)
3458                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3459         else
3460                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3461
3462         if (sh_num == 0xffffffff)
3463                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3464         else
3465                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3466
3467         WREG32(mmGRBM_GFX_INDEX, data);
3468 }
3469
3470 static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
3471 {
3472         u32 data, mask;
3473
3474         data =  RREG32(mmCC_RB_BACKEND_DISABLE) |
3475                 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3476
3477         data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE);
3478
3479         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
3480                                          adev->gfx.config.max_sh_per_se);
3481
3482         return (~data) & mask;
3483 }
3484
3485 static void
3486 gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1)
3487 {
3488         switch (adev->asic_type) {
3489         case CHIP_FIJI:
3490                 *rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) |
3491                           RB_XSEL2(1) | PKR_MAP(2) |
3492                           PKR_XSEL(1) | PKR_YSEL(1) |
3493                           SE_MAP(2) | SE_XSEL(2) | SE_YSEL(3);
3494                 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) |
3495                            SE_PAIR_YSEL(2);
3496                 break;
3497         case CHIP_TONGA:
3498         case CHIP_POLARIS10:
3499                 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3500                           SE_XSEL(1) | SE_YSEL(1);
3501                 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(2) |
3502                            SE_PAIR_YSEL(2);
3503                 break;
3504         case CHIP_TOPAZ:
3505         case CHIP_CARRIZO:
3506                 *rconf |= RB_MAP_PKR0(2);
3507                 *rconf1 |= 0x0;
3508                 break;
3509         case CHIP_POLARIS11:
3510         case CHIP_POLARIS12:
3511                 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3512                           SE_XSEL(1) | SE_YSEL(1);
3513                 *rconf1 |= 0x0;
3514                 break;
3515         case CHIP_STONEY:
3516                 *rconf |= 0x0;
3517                 *rconf1 |= 0x0;
3518                 break;
3519         default:
3520                 DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type);
3521                 break;
3522         }
3523 }
3524
3525 static void
3526 gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev,
3527                                         u32 raster_config, u32 raster_config_1,
3528                                         unsigned rb_mask, unsigned num_rb)
3529 {
3530         unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1);
3531         unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1);
3532         unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2);
3533         unsigned rb_per_se = num_rb / num_se;
3534         unsigned se_mask[4];
3535         unsigned se;
3536
3537         se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask;
3538         se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask;
3539         se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask;
3540         se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask;
3541
3542         WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4));
3543         WARN_ON(!(sh_per_se == 1 || sh_per_se == 2));
3544         WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2));
3545
3546         if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
3547                              (!se_mask[2] && !se_mask[3]))) {
3548                 raster_config_1 &= ~SE_PAIR_MAP_MASK;
3549
3550                 if (!se_mask[0] && !se_mask[1]) {
3551                         raster_config_1 |=
3552                                 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3);
3553                 } else {
3554                         raster_config_1 |=
3555                                 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0);
3556                 }
3557         }
3558
3559         for (se = 0; se < num_se; se++) {
3560                 unsigned raster_config_se = raster_config;
3561                 unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
3562                 unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
3563                 int idx = (se / 2) * 2;
3564
3565                 if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
3566                         raster_config_se &= ~SE_MAP_MASK;
3567
3568                         if (!se_mask[idx]) {
3569                                 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3);
3570                         } else {
3571                                 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0);
3572                         }
3573                 }
3574
3575                 pkr0_mask &= rb_mask;
3576                 pkr1_mask &= rb_mask;
3577                 if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
3578                         raster_config_se &= ~PKR_MAP_MASK;
3579
3580                         if (!pkr0_mask) {
3581                                 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3);
3582                         } else {
3583                                 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0);
3584                         }
3585                 }
3586
3587                 if (rb_per_se >= 2) {
3588                         unsigned rb0_mask = 1 << (se * rb_per_se);
3589                         unsigned rb1_mask = rb0_mask << 1;
3590
3591                         rb0_mask &= rb_mask;
3592                         rb1_mask &= rb_mask;
3593                         if (!rb0_mask || !rb1_mask) {
3594                                 raster_config_se &= ~RB_MAP_PKR0_MASK;
3595
3596                                 if (!rb0_mask) {
3597                                         raster_config_se |=
3598                                                 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3);
3599                                 } else {
3600                                         raster_config_se |=
3601                                                 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0);
3602                                 }
3603                         }
3604
3605                         if (rb_per_se > 2) {
3606                                 rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
3607                                 rb1_mask = rb0_mask << 1;
3608                                 rb0_mask &= rb_mask;
3609                                 rb1_mask &= rb_mask;
3610                                 if (!rb0_mask || !rb1_mask) {
3611                                         raster_config_se &= ~RB_MAP_PKR1_MASK;
3612
3613                                         if (!rb0_mask) {
3614                                                 raster_config_se |=
3615                                                         RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3);
3616                                         } else {
3617                                                 raster_config_se |=
3618                                                         RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0);
3619                                         }
3620                                 }
3621                         }
3622                 }
3623
3624                 /* GRBM_GFX_INDEX has a different offset on VI */
3625                 gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff);
3626                 WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se);
3627                 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3628         }
3629
3630         /* GRBM_GFX_INDEX has a different offset on VI */
3631         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3632 }
3633
3634 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
3635 {
3636         int i, j;
3637         u32 data;
3638         u32 raster_config = 0, raster_config_1 = 0;
3639         u32 active_rbs = 0;
3640         u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
3641                                         adev->gfx.config.max_sh_per_se;
3642         unsigned num_rb_pipes;
3643
3644         mutex_lock(&adev->grbm_idx_mutex);
3645         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3646                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3647                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3648                         data = gfx_v8_0_get_rb_active_bitmap(adev);
3649                         active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
3650                                                rb_bitmap_width_per_sh);
3651                 }
3652         }
3653         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3654
3655         adev->gfx.config.backend_enable_mask = active_rbs;
3656         adev->gfx.config.num_rbs = hweight32(active_rbs);
3657
3658         num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se *
3659                              adev->gfx.config.max_shader_engines, 16);
3660
3661         gfx_v8_0_raster_config(adev, &raster_config, &raster_config_1);
3662
3663         if (!adev->gfx.config.backend_enable_mask ||
3664                         adev->gfx.config.num_rbs >= num_rb_pipes) {
3665                 WREG32(mmPA_SC_RASTER_CONFIG, raster_config);
3666                 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3667         } else {
3668                 gfx_v8_0_write_harvested_raster_configs(adev, raster_config, raster_config_1,
3669                                                         adev->gfx.config.backend_enable_mask,
3670                                                         num_rb_pipes);
3671         }
3672
3673         /* cache the values for userspace */
3674         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3675                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3676                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3677                         adev->gfx.config.rb_config[i][j].rb_backend_disable =
3678                                 RREG32(mmCC_RB_BACKEND_DISABLE);
3679                         adev->gfx.config.rb_config[i][j].user_rb_backend_disable =
3680                                 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3681                         adev->gfx.config.rb_config[i][j].raster_config =
3682                                 RREG32(mmPA_SC_RASTER_CONFIG);
3683                         adev->gfx.config.rb_config[i][j].raster_config_1 =
3684                                 RREG32(mmPA_SC_RASTER_CONFIG_1);
3685                 }
3686         }
3687         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3688         mutex_unlock(&adev->grbm_idx_mutex);
3689 }
3690
3691 /**
3692  * gfx_v8_0_init_compute_vmid - gart enable
3693  *
3694  * @adev: amdgpu_device pointer
3695  *
3696  * Initialize compute vmid sh_mem registers
3697  *
3698  */
3699 #define DEFAULT_SH_MEM_BASES    (0x6000)
3700 #define FIRST_COMPUTE_VMID      (8)
3701 #define LAST_COMPUTE_VMID       (16)
3702 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
3703 {
3704         int i;
3705         uint32_t sh_mem_config;
3706         uint32_t sh_mem_bases;
3707
3708         /*
3709          * Configure apertures:
3710          * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
3711          * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
3712          * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
3713          */
3714         sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
3715
3716         sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
3717                         SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
3718                         SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
3719                         SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
3720                         MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
3721                         SH_MEM_CONFIG__PRIVATE_ATC_MASK;
3722
3723         mutex_lock(&adev->srbm_mutex);
3724         for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
3725                 vi_srbm_select(adev, 0, 0, 0, i);
3726                 /* CP and shaders */
3727                 WREG32(mmSH_MEM_CONFIG, sh_mem_config);
3728                 WREG32(mmSH_MEM_APE1_BASE, 1);
3729                 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3730                 WREG32(mmSH_MEM_BASES, sh_mem_bases);
3731         }
3732         vi_srbm_select(adev, 0, 0, 0, 0);
3733         mutex_unlock(&adev->srbm_mutex);
3734 }
3735
3736 static void gfx_v8_0_config_init(struct amdgpu_device *adev)
3737 {
3738         switch (adev->asic_type) {
3739         default:
3740                 adev->gfx.config.double_offchip_lds_buf = 1;
3741                 break;
3742         case CHIP_CARRIZO:
3743         case CHIP_STONEY:
3744                 adev->gfx.config.double_offchip_lds_buf = 0;
3745                 break;
3746         }
3747 }
3748
3749 static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
3750 {
3751         u32 tmp, sh_static_mem_cfg;
3752         int i;
3753
3754         WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF);
3755         WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3756         WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3757         WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
3758
3759         gfx_v8_0_tiling_mode_table_init(adev);
3760         gfx_v8_0_setup_rb(adev);
3761         gfx_v8_0_get_cu_info(adev);
3762         gfx_v8_0_config_init(adev);
3763
3764         /* XXX SH_MEM regs */
3765         /* where to put LDS, scratch, GPUVM in FSA64 space */
3766         sh_static_mem_cfg = REG_SET_FIELD(0, SH_STATIC_MEM_CONFIG,
3767                                    SWIZZLE_ENABLE, 1);
3768         sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3769                                    ELEMENT_SIZE, 1);
3770         sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3771                                    INDEX_STRIDE, 3);
3772         WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg);
3773
3774         mutex_lock(&adev->srbm_mutex);
3775         for (i = 0; i < adev->vm_manager.id_mgr[0].num_ids; i++) {
3776                 vi_srbm_select(adev, 0, 0, 0, i);
3777                 /* CP and shaders */
3778                 if (i == 0) {
3779                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
3780                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3781                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3782                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3783                         WREG32(mmSH_MEM_CONFIG, tmp);
3784                         WREG32(mmSH_MEM_BASES, 0);
3785                 } else {
3786                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
3787                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3788                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3789                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3790                         WREG32(mmSH_MEM_CONFIG, tmp);
3791                         tmp = adev->mc.shared_aperture_start >> 48;
3792                         WREG32(mmSH_MEM_BASES, tmp);
3793                 }
3794
3795                 WREG32(mmSH_MEM_APE1_BASE, 1);
3796                 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3797         }
3798         vi_srbm_select(adev, 0, 0, 0, 0);
3799         mutex_unlock(&adev->srbm_mutex);
3800
3801         gfx_v8_0_init_compute_vmid(adev);
3802
3803         mutex_lock(&adev->grbm_idx_mutex);
3804         /*
3805          * making sure that the following register writes will be broadcasted
3806          * to all the shaders
3807          */
3808         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3809
3810         WREG32(mmPA_SC_FIFO_SIZE,
3811                    (adev->gfx.config.sc_prim_fifo_size_frontend <<
3812                         PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
3813                    (adev->gfx.config.sc_prim_fifo_size_backend <<
3814                         PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
3815                    (adev->gfx.config.sc_hiz_tile_fifo_size <<
3816                         PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
3817                    (adev->gfx.config.sc_earlyz_tile_fifo_size <<
3818                         PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
3819
3820         tmp = RREG32(mmSPI_ARB_PRIORITY);
3821         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS0, 2);
3822         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS1, 2);
3823         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS2, 2);
3824         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS3, 2);
3825         WREG32(mmSPI_ARB_PRIORITY, tmp);
3826
3827         mutex_unlock(&adev->grbm_idx_mutex);
3828
3829 }
3830
3831 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3832 {
3833         u32 i, j, k;
3834         u32 mask;
3835
3836         mutex_lock(&adev->grbm_idx_mutex);
3837         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3838                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3839                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3840                         for (k = 0; k < adev->usec_timeout; k++) {
3841                                 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3842                                         break;
3843                                 udelay(1);
3844                         }
3845                 }
3846         }
3847         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3848         mutex_unlock(&adev->grbm_idx_mutex);
3849
3850         mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3851                 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3852                 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3853                 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3854         for (k = 0; k < adev->usec_timeout; k++) {
3855                 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3856                         break;
3857                 udelay(1);
3858         }
3859 }
3860
3861 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3862                                                bool enable)
3863 {
3864         u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3865
3866         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
3867         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
3868         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
3869         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
3870
3871         WREG32(mmCP_INT_CNTL_RING0, tmp);
3872 }
3873
3874 static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
3875 {
3876         /* csib */
3877         WREG32(mmRLC_CSIB_ADDR_HI,
3878                         adev->gfx.rlc.clear_state_gpu_addr >> 32);
3879         WREG32(mmRLC_CSIB_ADDR_LO,
3880                         adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
3881         WREG32(mmRLC_CSIB_LENGTH,
3882                         adev->gfx.rlc.clear_state_size);
3883 }
3884
3885 static void gfx_v8_0_parse_ind_reg_list(int *register_list_format,
3886                                 int ind_offset,
3887                                 int list_size,
3888                                 int *unique_indices,
3889                                 int *indices_count,
3890                                 int max_indices,
3891                                 int *ind_start_offsets,
3892                                 int *offset_count,
3893                                 int max_offset)
3894 {
3895         int indices;
3896         bool new_entry = true;
3897
3898         for (; ind_offset < list_size; ind_offset++) {
3899
3900                 if (new_entry) {
3901                         new_entry = false;
3902                         ind_start_offsets[*offset_count] = ind_offset;
3903                         *offset_count = *offset_count + 1;
3904                         BUG_ON(*offset_count >= max_offset);
3905                 }
3906
3907                 if (register_list_format[ind_offset] == 0xFFFFFFFF) {
3908                         new_entry = true;
3909                         continue;
3910                 }
3911
3912                 ind_offset += 2;
3913
3914                 /* look for the matching indice */
3915                 for (indices = 0;
3916                         indices < *indices_count;
3917                         indices++) {
3918                         if (unique_indices[indices] ==
3919                                 register_list_format[ind_offset])
3920                                 break;
3921                 }
3922
3923                 if (indices >= *indices_count) {
3924                         unique_indices[*indices_count] =
3925                                 register_list_format[ind_offset];
3926                         indices = *indices_count;
3927                         *indices_count = *indices_count + 1;
3928                         BUG_ON(*indices_count >= max_indices);
3929                 }
3930
3931                 register_list_format[ind_offset] = indices;
3932         }
3933 }
3934
3935 static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
3936 {
3937         int i, temp, data;
3938         int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0};
3939         int indices_count = 0;
3940         int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3941         int offset_count = 0;
3942
3943         int list_size;
3944         unsigned int *register_list_format =
3945                 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
3946         if (!register_list_format)
3947                 return -ENOMEM;
3948         memcpy(register_list_format, adev->gfx.rlc.register_list_format,
3949                         adev->gfx.rlc.reg_list_format_size_bytes);
3950
3951         gfx_v8_0_parse_ind_reg_list(register_list_format,
3952                                 RLC_FormatDirectRegListLength,
3953                                 adev->gfx.rlc.reg_list_format_size_bytes >> 2,
3954                                 unique_indices,
3955                                 &indices_count,
3956                                 ARRAY_SIZE(unique_indices),
3957                                 indirect_start_offsets,
3958                                 &offset_count,
3959                                 ARRAY_SIZE(indirect_start_offsets));
3960
3961         /* save and restore list */
3962         WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1);
3963
3964         WREG32(mmRLC_SRM_ARAM_ADDR, 0);
3965         for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
3966                 WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]);
3967
3968         /* indirect list */
3969         WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start);
3970         for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
3971                 WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]);
3972
3973         list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
3974         list_size = list_size >> 1;
3975         WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size);
3976         WREG32(mmRLC_GPM_SCRATCH_DATA, list_size);
3977
3978         /* starting offsets starts */
3979         WREG32(mmRLC_GPM_SCRATCH_ADDR,
3980                 adev->gfx.rlc.starting_offsets_start);
3981         for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
3982                 WREG32(mmRLC_GPM_SCRATCH_DATA,
3983                                 indirect_start_offsets[i]);
3984
3985         /* unique indices */
3986         temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
3987         data = mmRLC_SRM_INDEX_CNTL_DATA_0;
3988         for (i = 0; i < ARRAY_SIZE(unique_indices); i++) {
3989                 if (unique_indices[i] != 0) {
3990                         WREG32(temp + i, unique_indices[i] & 0x3FFFF);
3991                         WREG32(data + i, unique_indices[i] >> 20);
3992                 }
3993         }
3994         kfree(register_list_format);
3995
3996         return 0;
3997 }
3998
3999 static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
4000 {
4001         WREG32_FIELD(RLC_SRM_CNTL, SRM_ENABLE, 1);
4002 }
4003
4004 static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev)
4005 {
4006         uint32_t data;
4007
4008         WREG32_FIELD(CP_RB_WPTR_POLL_CNTL, IDLE_POLL_COUNT, 0x60);
4009
4010         data = REG_SET_FIELD(0, RLC_PG_DELAY, POWER_UP_DELAY, 0x10);
4011         data = REG_SET_FIELD(data, RLC_PG_DELAY, POWER_DOWN_DELAY, 0x10);
4012         data = REG_SET_FIELD(data, RLC_PG_DELAY, CMD_PROPAGATE_DELAY, 0x10);
4013         data = REG_SET_FIELD(data, RLC_PG_DELAY, MEM_SLEEP_DELAY, 0x10);
4014         WREG32(mmRLC_PG_DELAY, data);
4015
4016         WREG32_FIELD(RLC_PG_DELAY_2, SERDES_CMD_DELAY, 0x3);
4017         WREG32_FIELD(RLC_AUTO_PG_CTRL, GRBM_REG_SAVE_GFX_IDLE_THRESHOLD, 0x55f0);
4018
4019 }
4020
4021 static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
4022                                                 bool enable)
4023 {
4024         WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PU_ENABLE, enable ? 1 : 0);
4025 }
4026
4027 static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
4028                                                   bool enable)
4029 {
4030         WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PD_ENABLE, enable ? 1 : 0);
4031 }
4032
4033 static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable)
4034 {
4035         WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 0 : 1);
4036 }
4037
4038 static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
4039 {
4040         if ((adev->asic_type == CHIP_CARRIZO) ||
4041             (adev->asic_type == CHIP_STONEY)) {
4042                 gfx_v8_0_init_csb(adev);
4043                 gfx_v8_0_init_save_restore_list(adev);
4044                 gfx_v8_0_enable_save_restore_machine(adev);
4045                 WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
4046                 gfx_v8_0_init_power_gating(adev);
4047                 WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
4048         } else if ((adev->asic_type == CHIP_POLARIS11) ||
4049                    (adev->asic_type == CHIP_POLARIS12)) {
4050                 gfx_v8_0_init_csb(adev);
4051                 gfx_v8_0_init_save_restore_list(adev);
4052                 gfx_v8_0_enable_save_restore_machine(adev);
4053                 gfx_v8_0_init_power_gating(adev);
4054         }
4055
4056 }
4057
4058 static void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
4059 {
4060         WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 0);
4061
4062         gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4063         gfx_v8_0_wait_for_rlc_serdes(adev);
4064 }
4065
4066 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
4067 {
4068         WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4069         udelay(50);
4070
4071         WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
4072         udelay(50);
4073 }
4074
4075 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
4076 {
4077         WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 1);
4078
4079         /* carrizo do enable cp interrupt after cp inited */
4080         if (!(adev->flags & AMD_IS_APU))
4081                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4082
4083         udelay(50);
4084 }
4085
4086 static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev)
4087 {
4088         const struct rlc_firmware_header_v2_0 *hdr;
4089         const __le32 *fw_data;
4090         unsigned i, fw_size;
4091
4092         if (!adev->gfx.rlc_fw)
4093                 return -EINVAL;
4094
4095         hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
4096         amdgpu_ucode_print_rlc_hdr(&hdr->header);
4097
4098         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
4099                            le32_to_cpu(hdr->header.ucode_array_offset_bytes));
4100         fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
4101
4102         WREG32(mmRLC_GPM_UCODE_ADDR, 0);
4103         for (i = 0; i < fw_size; i++)
4104                 WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
4105         WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
4106
4107         return 0;
4108 }
4109
4110 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
4111 {
4112         int r;
4113         u32 tmp;
4114
4115         gfx_v8_0_rlc_stop(adev);
4116
4117         /* disable CG */
4118         tmp = RREG32(mmRLC_CGCG_CGLS_CTRL);
4119         tmp &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
4120                  RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4121         WREG32(mmRLC_CGCG_CGLS_CTRL, tmp);
4122         if (adev->asic_type == CHIP_POLARIS11 ||
4123             adev->asic_type == CHIP_POLARIS10 ||
4124             adev->asic_type == CHIP_POLARIS12) {
4125                 tmp = RREG32(mmRLC_CGCG_CGLS_CTRL_3D);
4126                 tmp &= ~0x3;
4127                 WREG32(mmRLC_CGCG_CGLS_CTRL_3D, tmp);
4128         }
4129
4130         /* disable PG */
4131         WREG32(mmRLC_PG_CNTL, 0);
4132
4133         gfx_v8_0_rlc_reset(adev);
4134         gfx_v8_0_init_pg(adev);
4135
4136
4137         if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
4138                 /* legacy rlc firmware loading */
4139                 r = gfx_v8_0_rlc_load_microcode(adev);
4140                 if (r)
4141                         return r;
4142         }
4143
4144         gfx_v8_0_rlc_start(adev);
4145
4146         return 0;
4147 }
4148
4149 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
4150 {
4151         int i;
4152         u32 tmp = RREG32(mmCP_ME_CNTL);
4153
4154         if (enable) {
4155                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
4156                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
4157                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
4158         } else {
4159                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
4160                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
4161                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
4162                 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
4163                         adev->gfx.gfx_ring[i].ready = false;
4164         }
4165         WREG32(mmCP_ME_CNTL, tmp);
4166         udelay(50);
4167 }
4168
4169 static int gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
4170 {
4171         const struct gfx_firmware_header_v1_0 *pfp_hdr;
4172         const struct gfx_firmware_header_v1_0 *ce_hdr;
4173         const struct gfx_firmware_header_v1_0 *me_hdr;
4174         const __le32 *fw_data;
4175         unsigned i, fw_size;
4176
4177         if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
4178                 return -EINVAL;
4179
4180         pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
4181                 adev->gfx.pfp_fw->data;
4182         ce_hdr = (const struct gfx_firmware_header_v1_0 *)
4183                 adev->gfx.ce_fw->data;
4184         me_hdr = (const struct gfx_firmware_header_v1_0 *)
4185                 adev->gfx.me_fw->data;
4186
4187         amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
4188         amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
4189         amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
4190
4191         gfx_v8_0_cp_gfx_enable(adev, false);
4192
4193         /* PFP */
4194         fw_data = (const __le32 *)
4195                 (adev->gfx.pfp_fw->data +
4196                  le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
4197         fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
4198         WREG32(mmCP_PFP_UCODE_ADDR, 0);
4199         for (i = 0; i < fw_size; i++)
4200                 WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
4201         WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
4202
4203         /* CE */
4204         fw_data = (const __le32 *)
4205                 (adev->gfx.ce_fw->data +
4206                  le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
4207         fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
4208         WREG32(mmCP_CE_UCODE_ADDR, 0);
4209         for (i = 0; i < fw_size; i++)
4210                 WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
4211         WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
4212
4213         /* ME */
4214         fw_data = (const __le32 *)
4215                 (adev->gfx.me_fw->data +
4216                  le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
4217         fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
4218         WREG32(mmCP_ME_RAM_WADDR, 0);
4219         for (i = 0; i < fw_size; i++)
4220                 WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
4221         WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
4222
4223         return 0;
4224 }
4225
4226 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
4227 {
4228         u32 count = 0;
4229         const struct cs_section_def *sect = NULL;
4230         const struct cs_extent_def *ext = NULL;
4231
4232         /* begin clear state */
4233         count += 2;
4234         /* context control state */
4235         count += 3;
4236
4237         for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4238                 for (ext = sect->section; ext->extent != NULL; ++ext) {
4239                         if (sect->id == SECT_CONTEXT)
4240                                 count += 2 + ext->reg_count;
4241                         else
4242                                 return 0;
4243                 }
4244         }
4245         /* pa_sc_raster_config/pa_sc_raster_config1 */
4246         count += 4;
4247         /* end clear state */
4248         count += 2;
4249         /* clear state */
4250         count += 2;
4251
4252         return count;
4253 }
4254
4255 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
4256 {
4257         struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
4258         const struct cs_section_def *sect = NULL;
4259         const struct cs_extent_def *ext = NULL;
4260         int r, i;
4261
4262         /* init the CP */
4263         WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
4264         WREG32(mmCP_ENDIAN_SWAP, 0);
4265         WREG32(mmCP_DEVICE_ID, 1);
4266
4267         gfx_v8_0_cp_gfx_enable(adev, true);
4268
4269         r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
4270         if (r) {
4271                 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
4272                 return r;
4273         }
4274
4275         /* clear state buffer */
4276         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4277         amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4278
4279         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4280         amdgpu_ring_write(ring, 0x80000000);
4281         amdgpu_ring_write(ring, 0x80000000);
4282
4283         for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4284                 for (ext = sect->section; ext->extent != NULL; ++ext) {
4285                         if (sect->id == SECT_CONTEXT) {
4286                                 amdgpu_ring_write(ring,
4287                                        PACKET3(PACKET3_SET_CONTEXT_REG,
4288                                                ext->reg_count));
4289                                 amdgpu_ring_write(ring,
4290                                        ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
4291                                 for (i = 0; i < ext->reg_count; i++)
4292                                         amdgpu_ring_write(ring, ext->extent[i]);
4293                         }
4294                 }
4295         }
4296
4297         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4298         amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
4299         switch (adev->asic_type) {
4300         case CHIP_TONGA:
4301         case CHIP_POLARIS10:
4302                 amdgpu_ring_write(ring, 0x16000012);
4303                 amdgpu_ring_write(ring, 0x0000002A);
4304                 break;
4305         case CHIP_POLARIS11:
4306         case CHIP_POLARIS12:
4307                 amdgpu_ring_write(ring, 0x16000012);
4308                 amdgpu_ring_write(ring, 0x00000000);
4309                 break;
4310         case CHIP_FIJI:
4311                 amdgpu_ring_write(ring, 0x3a00161a);
4312                 amdgpu_ring_write(ring, 0x0000002e);
4313                 break;
4314         case CHIP_CARRIZO:
4315                 amdgpu_ring_write(ring, 0x00000002);
4316                 amdgpu_ring_write(ring, 0x00000000);
4317                 break;
4318         case CHIP_TOPAZ:
4319                 amdgpu_ring_write(ring, adev->gfx.config.num_rbs == 1 ?
4320                                 0x00000000 : 0x00000002);
4321                 amdgpu_ring_write(ring, 0x00000000);
4322                 break;
4323         case CHIP_STONEY:
4324                 amdgpu_ring_write(ring, 0x00000000);
4325                 amdgpu_ring_write(ring, 0x00000000);
4326                 break;
4327         default:
4328                 BUG();
4329         }
4330
4331         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4332         amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4333
4334         amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4335         amdgpu_ring_write(ring, 0);
4336
4337         /* init the CE partitions */
4338         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4339         amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4340         amdgpu_ring_write(ring, 0x8000);
4341         amdgpu_ring_write(ring, 0x8000);
4342
4343         amdgpu_ring_commit(ring);
4344
4345         return 0;
4346 }
4347 static void gfx_v8_0_set_cpg_door_bell(struct amdgpu_device *adev, struct amdgpu_ring *ring)
4348 {
4349         u32 tmp;
4350         /* no gfx doorbells on iceland */
4351         if (adev->asic_type == CHIP_TOPAZ)
4352                 return;
4353
4354         tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
4355
4356         if (ring->use_doorbell) {
4357                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4358                                 DOORBELL_OFFSET, ring->doorbell_index);
4359                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4360                                                 DOORBELL_HIT, 0);
4361                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4362                                             DOORBELL_EN, 1);
4363         } else {
4364                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
4365         }
4366
4367         WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
4368
4369         if (adev->flags & AMD_IS_APU)
4370                 return;
4371
4372         tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
4373                                         DOORBELL_RANGE_LOWER,
4374                                         AMDGPU_DOORBELL_GFX_RING0);
4375         WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
4376
4377         WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
4378                 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
4379 }
4380
4381 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
4382 {
4383         struct amdgpu_ring *ring;
4384         u32 tmp;
4385         u32 rb_bufsz;
4386         u64 rb_addr, rptr_addr, wptr_gpu_addr;
4387         int r;
4388
4389         /* Set the write pointer delay */
4390         WREG32(mmCP_RB_WPTR_DELAY, 0);
4391
4392         /* set the RB to use vmid 0 */
4393         WREG32(mmCP_RB_VMID, 0);
4394
4395         /* Set ring buffer size */
4396         ring = &adev->gfx.gfx_ring[0];
4397         rb_bufsz = order_base_2(ring->ring_size / 8);
4398         tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
4399         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
4400         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
4401         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
4402 #ifdef __BIG_ENDIAN
4403         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
4404 #endif
4405         WREG32(mmCP_RB0_CNTL, tmp);
4406
4407         /* Initialize the ring buffer's read and write pointers */
4408         WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
4409         ring->wptr = 0;
4410         WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
4411
4412         /* set the wb address wether it's enabled or not */
4413         rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4414         WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
4415         WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
4416
4417         wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4418         WREG32(mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
4419         WREG32(mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
4420         mdelay(1);
4421         WREG32(mmCP_RB0_CNTL, tmp);
4422
4423         rb_addr = ring->gpu_addr >> 8;
4424         WREG32(mmCP_RB0_BASE, rb_addr);
4425         WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
4426
4427         gfx_v8_0_set_cpg_door_bell(adev, ring);
4428         /* start the ring */
4429         amdgpu_ring_clear_ring(ring);
4430         gfx_v8_0_cp_gfx_start(adev);
4431         ring->ready = true;
4432         r = amdgpu_ring_test_ring(ring);
4433         if (r)
4434                 ring->ready = false;
4435
4436         return r;
4437 }
4438
4439 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
4440 {
4441         int i;
4442
4443         if (enable) {
4444                 WREG32(mmCP_MEC_CNTL, 0);
4445         } else {
4446                 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
4447                 for (i = 0; i < adev->gfx.num_compute_rings; i++)
4448                         adev->gfx.compute_ring[i].ready = false;
4449                 adev->gfx.kiq.ring.ready = false;
4450         }
4451         udelay(50);
4452 }
4453
4454 static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev)
4455 {
4456         const struct gfx_firmware_header_v1_0 *mec_hdr;
4457         const __le32 *fw_data;
4458         unsigned i, fw_size;
4459
4460         if (!adev->gfx.mec_fw)
4461                 return -EINVAL;
4462
4463         gfx_v8_0_cp_compute_enable(adev, false);
4464
4465         mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
4466         amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
4467
4468         fw_data = (const __le32 *)
4469                 (adev->gfx.mec_fw->data +
4470                  le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4471         fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4472
4473         /* MEC1 */
4474         WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0);
4475         for (i = 0; i < fw_size; i++)
4476                 WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data+i));
4477         WREG32(mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version);
4478
4479         /* Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
4480         if (adev->gfx.mec2_fw) {
4481                 const struct gfx_firmware_header_v1_0 *mec2_hdr;
4482
4483                 mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
4484                 amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header);
4485
4486                 fw_data = (const __le32 *)
4487                         (adev->gfx.mec2_fw->data +
4488                          le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4489                 fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4490
4491                 WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0);
4492                 for (i = 0; i < fw_size; i++)
4493                         WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data+i));
4494                 WREG32(mmCP_MEC_ME2_UCODE_ADDR, adev->gfx.mec2_fw_version);
4495         }
4496
4497         return 0;
4498 }
4499
4500 /* KIQ functions */
4501 static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring)
4502 {
4503         uint32_t tmp;
4504         struct amdgpu_device *adev = ring->adev;
4505
4506         /* tell RLC which is KIQ queue */
4507         tmp = RREG32(mmRLC_CP_SCHEDULERS);
4508         tmp &= 0xffffff00;
4509         tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
4510         WREG32(mmRLC_CP_SCHEDULERS, tmp);
4511         tmp |= 0x80;
4512         WREG32(mmRLC_CP_SCHEDULERS, tmp);
4513 }
4514
4515 static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev)
4516 {
4517         struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
4518         uint32_t scratch, tmp = 0;
4519         uint64_t queue_mask = 0;
4520         int r, i;
4521
4522         for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
4523                 if (!test_bit(i, adev->gfx.mec.queue_bitmap))
4524                         continue;
4525
4526                 /* This situation may be hit in the future if a new HW
4527                  * generation exposes more than 64 queues. If so, the
4528                  * definition of queue_mask needs updating */
4529                 if (WARN_ON(i >= (sizeof(queue_mask)*8))) {
4530                         DRM_ERROR("Invalid KCQ enabled: %d\n", i);
4531                         break;
4532                 }
4533
4534                 queue_mask |= (1ull << i);
4535         }
4536
4537         r = amdgpu_gfx_scratch_get(adev, &scratch);
4538         if (r) {
4539                 DRM_ERROR("Failed to get scratch reg (%d).\n", r);
4540                 return r;
4541         }
4542         WREG32(scratch, 0xCAFEDEAD);
4543
4544         r = amdgpu_ring_alloc(kiq_ring, (8 * adev->gfx.num_compute_rings) + 11);
4545         if (r) {
4546                 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
4547                 amdgpu_gfx_scratch_free(adev, scratch);
4548                 return r;
4549         }
4550         /* set resources */
4551         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
4552         amdgpu_ring_write(kiq_ring, 0); /* vmid_mask:0 queue_type:0 (KIQ) */
4553         amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */
4554         amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */
4555         amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
4556         amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
4557         amdgpu_ring_write(kiq_ring, 0); /* oac mask */
4558         amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
4559         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4560                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4561                 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
4562                 uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4563
4564                 /* map queues */
4565                 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
4566                 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
4567                 amdgpu_ring_write(kiq_ring,
4568                                   PACKET3_MAP_QUEUES_NUM_QUEUES(1));
4569                 amdgpu_ring_write(kiq_ring,
4570                                   PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index) |
4571                                   PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
4572                                   PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
4573                                   PACKET3_MAP_QUEUES_ME(ring->me == 1 ? 0 : 1)); /* doorbell */
4574                 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
4575                 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
4576                 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
4577                 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
4578         }
4579         /* write to scratch for completion */
4580         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
4581         amdgpu_ring_write(kiq_ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
4582         amdgpu_ring_write(kiq_ring, 0xDEADBEEF);
4583         amdgpu_ring_commit(kiq_ring);
4584
4585         for (i = 0; i < adev->usec_timeout; i++) {
4586                 tmp = RREG32(scratch);
4587                 if (tmp == 0xDEADBEEF)
4588                         break;
4589                 DRM_UDELAY(1);
4590         }
4591         if (i >= adev->usec_timeout) {
4592                 DRM_ERROR("KCQ enable failed (scratch(0x%04X)=0x%08X)\n",
4593                           scratch, tmp);
4594                 r = -EINVAL;
4595         }
4596         amdgpu_gfx_scratch_free(adev, scratch);
4597
4598         return r;
4599 }
4600
4601 static int gfx_v8_0_deactivate_hqd(struct amdgpu_device *adev, u32 req)
4602 {
4603         int i, r = 0;
4604
4605         if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) {
4606                 WREG32_FIELD(CP_HQD_DEQUEUE_REQUEST, DEQUEUE_REQ, req);
4607                 for (i = 0; i < adev->usec_timeout; i++) {
4608                         if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK))
4609                                 break;
4610                         udelay(1);
4611                 }
4612                 if (i == adev->usec_timeout)
4613                         r = -ETIMEDOUT;
4614         }
4615         WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
4616         WREG32(mmCP_HQD_PQ_RPTR, 0);
4617         WREG32(mmCP_HQD_PQ_WPTR, 0);
4618
4619         return r;
4620 }
4621
4622 static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
4623 {
4624         struct amdgpu_device *adev = ring->adev;
4625         struct vi_mqd *mqd = ring->mqd_ptr;
4626         uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
4627         uint32_t tmp;
4628
4629         mqd->header = 0xC0310800;
4630         mqd->compute_pipelinestat_enable = 0x00000001;
4631         mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4632         mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4633         mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4634         mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4635         mqd->compute_misc_reserved = 0x00000003;
4636         mqd->dynamic_cu_mask_addr_lo = lower_32_bits(ring->mqd_gpu_addr
4637                                                      + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4638         mqd->dynamic_cu_mask_addr_hi = upper_32_bits(ring->mqd_gpu_addr
4639                                                      + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4640         eop_base_addr = ring->eop_gpu_addr >> 8;
4641         mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
4642         mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
4643
4644         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4645         tmp = RREG32(mmCP_HQD_EOP_CONTROL);
4646         tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
4647                         (order_base_2(GFX8_MEC_HPD_SIZE / 4) - 1));
4648
4649         mqd->cp_hqd_eop_control = tmp;
4650
4651         /* enable doorbell? */
4652         tmp = REG_SET_FIELD(RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL),
4653                             CP_HQD_PQ_DOORBELL_CONTROL,
4654                             DOORBELL_EN,
4655                             ring->use_doorbell ? 1 : 0);
4656
4657         mqd->cp_hqd_pq_doorbell_control = tmp;
4658
4659         /* set the pointer to the MQD */
4660         mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
4661         mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
4662
4663         /* set MQD vmid to 0 */
4664         tmp = RREG32(mmCP_MQD_CONTROL);
4665         tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4666         mqd->cp_mqd_control = tmp;
4667
4668         /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4669         hqd_gpu_addr = ring->gpu_addr >> 8;
4670         mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4671         mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4672
4673         /* set up the HQD, this is similar to CP_RB0_CNTL */
4674         tmp = RREG32(mmCP_HQD_PQ_CONTROL);
4675         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4676                             (order_base_2(ring->ring_size / 4) - 1));
4677         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4678                         ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
4679 #ifdef __BIG_ENDIAN
4680         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
4681 #endif
4682         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
4683         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
4684         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4685         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4686         mqd->cp_hqd_pq_control = tmp;
4687
4688         /* set the wb address whether it's enabled or not */
4689         wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4690         mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4691         mqd->cp_hqd_pq_rptr_report_addr_hi =
4692                 upper_32_bits(wb_gpu_addr) & 0xffff;
4693
4694         /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4695         wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4696         mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
4697         mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4698
4699         tmp = 0;
4700         /* enable the doorbell if requested */
4701         if (ring->use_doorbell) {
4702                 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4703                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4704                                 DOORBELL_OFFSET, ring->doorbell_index);
4705
4706                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4707                                          DOORBELL_EN, 1);
4708                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4709                                          DOORBELL_SOURCE, 0);
4710                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4711                                          DOORBELL_HIT, 0);
4712         }
4713
4714         mqd->cp_hqd_pq_doorbell_control = tmp;
4715
4716         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4717         ring->wptr = 0;
4718         mqd->cp_hqd_pq_wptr = ring->wptr;
4719         mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
4720
4721         /* set the vmid for the queue */
4722         mqd->cp_hqd_vmid = 0;
4723
4724         tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
4725         tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
4726         mqd->cp_hqd_persistent_state = tmp;
4727
4728         /* set MTYPE */
4729         tmp = RREG32(mmCP_HQD_IB_CONTROL);
4730         tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
4731         tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MTYPE, 3);
4732         mqd->cp_hqd_ib_control = tmp;
4733
4734         tmp = RREG32(mmCP_HQD_IQ_TIMER);
4735         tmp = REG_SET_FIELD(tmp, CP_HQD_IQ_TIMER, MTYPE, 3);
4736         mqd->cp_hqd_iq_timer = tmp;
4737
4738         tmp = RREG32(mmCP_HQD_CTX_SAVE_CONTROL);
4739         tmp = REG_SET_FIELD(tmp, CP_HQD_CTX_SAVE_CONTROL, MTYPE, 3);
4740         mqd->cp_hqd_ctx_save_control = tmp;
4741
4742         /* defaults */
4743         mqd->cp_hqd_eop_rptr = RREG32(mmCP_HQD_EOP_RPTR);
4744         mqd->cp_hqd_eop_wptr = RREG32(mmCP_HQD_EOP_WPTR);
4745         mqd->cp_hqd_pipe_priority = RREG32(mmCP_HQD_PIPE_PRIORITY);
4746         mqd->cp_hqd_queue_priority = RREG32(mmCP_HQD_QUEUE_PRIORITY);
4747         mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM);
4748         mqd->cp_hqd_ctx_save_base_addr_lo = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_LO);
4749         mqd->cp_hqd_ctx_save_base_addr_hi = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_HI);
4750         mqd->cp_hqd_cntl_stack_offset = RREG32(mmCP_HQD_CNTL_STACK_OFFSET);
4751         mqd->cp_hqd_cntl_stack_size = RREG32(mmCP_HQD_CNTL_STACK_SIZE);
4752         mqd->cp_hqd_wg_state_offset = RREG32(mmCP_HQD_WG_STATE_OFFSET);
4753         mqd->cp_hqd_ctx_save_size = RREG32(mmCP_HQD_CTX_SAVE_SIZE);
4754         mqd->cp_hqd_eop_done_events = RREG32(mmCP_HQD_EOP_EVENTS);
4755         mqd->cp_hqd_error = RREG32(mmCP_HQD_ERROR);
4756         mqd->cp_hqd_eop_wptr_mem = RREG32(mmCP_HQD_EOP_WPTR_MEM);
4757         mqd->cp_hqd_eop_dones = RREG32(mmCP_HQD_EOP_DONES);
4758
4759         /* activate the queue */
4760         mqd->cp_hqd_active = 1;
4761
4762         return 0;
4763 }
4764
4765 int gfx_v8_0_mqd_commit(struct amdgpu_device *adev,
4766                         struct vi_mqd *mqd)
4767 {
4768         uint32_t mqd_reg;
4769         uint32_t *mqd_data;
4770
4771         /* HQD registers extend from mmCP_MQD_BASE_ADDR to mmCP_HQD_ERROR */
4772         mqd_data = &mqd->cp_mqd_base_addr_lo;
4773
4774         /* disable wptr polling */
4775         WREG32_FIELD(CP_PQ_WPTR_POLL_CNTL, EN, 0);
4776
4777         /* program all HQD registers */
4778         for (mqd_reg = mmCP_HQD_VMID; mqd_reg <= mmCP_HQD_EOP_CONTROL; mqd_reg++)
4779                 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4780
4781         /* Tonga errata: EOP RPTR/WPTR should be left unmodified.
4782          * This is safe since EOP RPTR==WPTR for any inactive HQD
4783          * on ASICs that do not support context-save.
4784          * EOP writes/reads can start anywhere in the ring.
4785          */
4786         if (adev->asic_type != CHIP_TONGA) {
4787                 WREG32(mmCP_HQD_EOP_RPTR, mqd->cp_hqd_eop_rptr);
4788                 WREG32(mmCP_HQD_EOP_WPTR, mqd->cp_hqd_eop_wptr);
4789                 WREG32(mmCP_HQD_EOP_WPTR_MEM, mqd->cp_hqd_eop_wptr_mem);
4790         }
4791
4792         for (mqd_reg = mmCP_HQD_EOP_EVENTS; mqd_reg <= mmCP_HQD_ERROR; mqd_reg++)
4793                 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4794
4795         /* activate the HQD */
4796         for (mqd_reg = mmCP_MQD_BASE_ADDR; mqd_reg <= mmCP_HQD_ACTIVE; mqd_reg++)
4797                 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4798
4799         return 0;
4800 }
4801
4802 static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring)
4803 {
4804         struct amdgpu_device *adev = ring->adev;
4805         struct vi_mqd *mqd = ring->mqd_ptr;
4806         int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
4807
4808         gfx_v8_0_kiq_setting(ring);
4809
4810         if (adev->in_sriov_reset) { /* for GPU_RESET case */
4811                 /* reset MQD to a clean status */
4812                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4813                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4814
4815                 /* reset ring buffer */
4816                 ring->wptr = 0;
4817                 amdgpu_ring_clear_ring(ring);
4818                 mutex_lock(&adev->srbm_mutex);
4819                 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4820                 gfx_v8_0_mqd_commit(adev, mqd);
4821                 vi_srbm_select(adev, 0, 0, 0, 0);
4822                 mutex_unlock(&adev->srbm_mutex);
4823         } else {
4824                 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4825                 ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4826                 ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
4827                 mutex_lock(&adev->srbm_mutex);
4828                 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4829                 gfx_v8_0_mqd_init(ring);
4830                 gfx_v8_0_mqd_commit(adev, mqd);
4831                 vi_srbm_select(adev, 0, 0, 0, 0);
4832                 mutex_unlock(&adev->srbm_mutex);
4833
4834                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4835                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4836         }
4837
4838         return 0;
4839 }
4840
4841 static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring)
4842 {
4843         struct amdgpu_device *adev = ring->adev;
4844         struct vi_mqd *mqd = ring->mqd_ptr;
4845         int mqd_idx = ring - &adev->gfx.compute_ring[0];
4846
4847         if (!adev->in_sriov_reset && !adev->gfx.in_suspend) {
4848                 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4849                 ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4850                 ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
4851                 mutex_lock(&adev->srbm_mutex);
4852                 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4853                 gfx_v8_0_mqd_init(ring);
4854                 vi_srbm_select(adev, 0, 0, 0, 0);
4855                 mutex_unlock(&adev->srbm_mutex);
4856
4857                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4858                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4859         } else if (adev->in_sriov_reset) { /* for GPU_RESET case */
4860                 /* reset MQD to a clean status */
4861                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4862                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4863                 /* reset ring buffer */
4864                 ring->wptr = 0;
4865                 amdgpu_ring_clear_ring(ring);
4866         } else {
4867                 amdgpu_ring_clear_ring(ring);
4868         }
4869         return 0;
4870 }
4871
4872 static void gfx_v8_0_set_mec_doorbell_range(struct amdgpu_device *adev)
4873 {
4874         if (adev->asic_type > CHIP_TONGA) {
4875                 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, AMDGPU_DOORBELL_KIQ << 2);
4876                 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, AMDGPU_DOORBELL_MEC_RING7 << 2);
4877         }
4878         /* enable doorbells */
4879         WREG32_FIELD(CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4880 }
4881
4882 static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev)
4883 {
4884         struct amdgpu_ring *ring = NULL;
4885         int r = 0, i;
4886
4887         gfx_v8_0_cp_compute_enable(adev, true);
4888
4889         ring = &adev->gfx.kiq.ring;
4890
4891         r = amdgpu_bo_reserve(ring->mqd_obj, false);
4892         if (unlikely(r != 0))
4893                 goto done;
4894
4895         r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4896         if (!r) {
4897                 r = gfx_v8_0_kiq_init_queue(ring);
4898                 amdgpu_bo_kunmap(ring->mqd_obj);
4899                 ring->mqd_ptr = NULL;
4900         }
4901         amdgpu_bo_unreserve(ring->mqd_obj);
4902         if (r)
4903                 goto done;
4904
4905         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4906                 ring = &adev->gfx.compute_ring[i];
4907
4908                 r = amdgpu_bo_reserve(ring->mqd_obj, false);
4909                 if (unlikely(r != 0))
4910                         goto done;
4911                 r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4912                 if (!r) {
4913                         r = gfx_v8_0_kcq_init_queue(ring);
4914                         amdgpu_bo_kunmap(ring->mqd_obj);
4915                         ring->mqd_ptr = NULL;
4916                 }
4917                 amdgpu_bo_unreserve(ring->mqd_obj);
4918                 if (r)
4919                         goto done;
4920         }
4921
4922         gfx_v8_0_set_mec_doorbell_range(adev);
4923
4924         r = gfx_v8_0_kiq_kcq_enable(adev);
4925         if (r)
4926                 goto done;
4927
4928         /* Test KIQ */
4929         ring = &adev->gfx.kiq.ring;
4930         ring->ready = true;
4931         r = amdgpu_ring_test_ring(ring);
4932         if (r) {
4933                 ring->ready = false;
4934                 goto done;
4935         }
4936
4937         /* Test KCQs */
4938         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4939                 ring = &adev->gfx.compute_ring[i];
4940                 ring->ready = true;
4941                 r = amdgpu_ring_test_ring(ring);
4942                 if (r)
4943                         ring->ready = false;
4944         }
4945
4946 done:
4947         return r;
4948 }
4949
4950 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
4951 {
4952         int r;
4953
4954         if (!(adev->flags & AMD_IS_APU))
4955                 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4956
4957         if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
4958                         /* legacy firmware loading */
4959                 r = gfx_v8_0_cp_gfx_load_microcode(adev);
4960                 if (r)
4961                         return r;
4962
4963                 r = gfx_v8_0_cp_compute_load_microcode(adev);
4964                 if (r)
4965                         return r;
4966         }
4967
4968         r = gfx_v8_0_cp_gfx_resume(adev);
4969         if (r)
4970                 return r;
4971
4972         r = gfx_v8_0_kiq_resume(adev);
4973         if (r)
4974                 return r;
4975
4976         gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4977
4978         return 0;
4979 }
4980
4981 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
4982 {
4983         gfx_v8_0_cp_gfx_enable(adev, enable);
4984         gfx_v8_0_cp_compute_enable(adev, enable);
4985 }
4986
4987 static int gfx_v8_0_hw_init(void *handle)
4988 {
4989         int r;
4990         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4991
4992         gfx_v8_0_init_golden_registers(adev);
4993         gfx_v8_0_gpu_init(adev);
4994
4995         r = gfx_v8_0_rlc_resume(adev);
4996         if (r)
4997                 return r;
4998
4999         r = gfx_v8_0_cp_resume(adev);
5000
5001         return r;
5002 }
5003
5004 static int gfx_v8_0_kcq_disable(struct amdgpu_ring *kiq_ring,struct amdgpu_ring *ring)
5005 {
5006         struct amdgpu_device *adev = kiq_ring->adev;
5007         uint32_t scratch, tmp = 0;
5008         int r, i;
5009
5010         r = amdgpu_gfx_scratch_get(adev, &scratch);
5011         if (r) {
5012                 DRM_ERROR("Failed to get scratch reg (%d).\n", r);
5013                 return r;
5014         }
5015         WREG32(scratch, 0xCAFEDEAD);
5016
5017         r = amdgpu_ring_alloc(kiq_ring, 10);
5018         if (r) {
5019                 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
5020                 amdgpu_gfx_scratch_free(adev, scratch);
5021                 return r;
5022         }
5023
5024         /* unmap queues */
5025         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
5026         amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
5027                                                 PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */
5028                                                 PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
5029                                                 PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) |
5030                                                 PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
5031         amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
5032         amdgpu_ring_write(kiq_ring, 0);
5033         amdgpu_ring_write(kiq_ring, 0);
5034         amdgpu_ring_write(kiq_ring, 0);
5035         /* write to scratch for completion */
5036         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
5037         amdgpu_ring_write(kiq_ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
5038         amdgpu_ring_write(kiq_ring, 0xDEADBEEF);
5039         amdgpu_ring_commit(kiq_ring);
5040
5041         for (i = 0; i < adev->usec_timeout; i++) {
5042                 tmp = RREG32(scratch);
5043                 if (tmp == 0xDEADBEEF)
5044                         break;
5045                 DRM_UDELAY(1);
5046         }
5047         if (i >= adev->usec_timeout) {
5048                 DRM_ERROR("KCQ disabled failed (scratch(0x%04X)=0x%08X)\n", scratch, tmp);
5049                 r = -EINVAL;
5050         }
5051         amdgpu_gfx_scratch_free(adev, scratch);
5052         return r;
5053 }
5054
5055 static int gfx_v8_0_hw_fini(void *handle)
5056 {
5057         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5058         int i;
5059
5060         amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
5061         amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
5062
5063         /* disable KCQ to avoid CPC touch memory not valid anymore */
5064         for (i = 0; i < adev->gfx.num_compute_rings; i++)
5065                 gfx_v8_0_kcq_disable(&adev->gfx.kiq.ring, &adev->gfx.compute_ring[i]);
5066
5067         if (amdgpu_sriov_vf(adev)) {
5068                 pr_debug("For SRIOV client, shouldn't do anything.\n");
5069                 return 0;
5070         }
5071         gfx_v8_0_cp_enable(adev, false);
5072         gfx_v8_0_rlc_stop(adev);
5073
5074         amdgpu_set_powergating_state(adev,
5075                         AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_UNGATE);
5076
5077         return 0;
5078 }
5079
5080 static int gfx_v8_0_suspend(void *handle)
5081 {
5082         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5083         adev->gfx.in_suspend = true;
5084         return gfx_v8_0_hw_fini(adev);
5085 }
5086
5087 static int gfx_v8_0_resume(void *handle)
5088 {
5089         int r;
5090         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5091
5092         r = gfx_v8_0_hw_init(adev);
5093         adev->gfx.in_suspend = false;
5094         return r;
5095 }
5096
5097 static bool gfx_v8_0_is_idle(void *handle)
5098 {
5099         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5100
5101         if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE))
5102                 return false;
5103         else
5104                 return true;
5105 }
5106
5107 static int gfx_v8_0_wait_for_idle(void *handle)
5108 {
5109         unsigned i;
5110         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5111
5112         for (i = 0; i < adev->usec_timeout; i++) {
5113                 if (gfx_v8_0_is_idle(handle))
5114                         return 0;
5115
5116                 udelay(1);
5117         }
5118         return -ETIMEDOUT;
5119 }
5120
5121 static bool gfx_v8_0_check_soft_reset(void *handle)
5122 {
5123         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5124         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5125         u32 tmp;
5126
5127         /* GRBM_STATUS */
5128         tmp = RREG32(mmGRBM_STATUS);
5129         if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
5130                    GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
5131                    GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
5132                    GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
5133                    GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
5134                    GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK |
5135                    GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
5136                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5137                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
5138                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5139                                                 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
5140                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5141                                                 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5142         }
5143
5144         /* GRBM_STATUS2 */
5145         tmp = RREG32(mmGRBM_STATUS2);
5146         if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
5147                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5148                                                 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
5149
5150         if (REG_GET_FIELD(tmp, GRBM_STATUS2, CPF_BUSY) ||
5151             REG_GET_FIELD(tmp, GRBM_STATUS2, CPC_BUSY) ||
5152             REG_GET_FIELD(tmp, GRBM_STATUS2, CPG_BUSY)) {
5153                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5154                                                 SOFT_RESET_CPF, 1);
5155                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5156                                                 SOFT_RESET_CPC, 1);
5157                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5158                                                 SOFT_RESET_CPG, 1);
5159                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET,
5160                                                 SOFT_RESET_GRBM, 1);
5161         }
5162
5163         /* SRBM_STATUS */
5164         tmp = RREG32(mmSRBM_STATUS);
5165         if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
5166                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5167                                                 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5168         if (REG_GET_FIELD(tmp, SRBM_STATUS, SEM_BUSY))
5169                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5170                                                 SRBM_SOFT_RESET, SOFT_RESET_SEM, 1);
5171
5172         if (grbm_soft_reset || srbm_soft_reset) {
5173                 adev->gfx.grbm_soft_reset = grbm_soft_reset;
5174                 adev->gfx.srbm_soft_reset = srbm_soft_reset;
5175                 return true;
5176         } else {
5177                 adev->gfx.grbm_soft_reset = 0;
5178                 adev->gfx.srbm_soft_reset = 0;
5179                 return false;
5180         }
5181 }
5182
5183 static int gfx_v8_0_pre_soft_reset(void *handle)
5184 {
5185         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5186         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5187
5188         if ((!adev->gfx.grbm_soft_reset) &&
5189             (!adev->gfx.srbm_soft_reset))
5190                 return 0;
5191
5192         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5193         srbm_soft_reset = adev->gfx.srbm_soft_reset;
5194
5195         /* stop the rlc */
5196         gfx_v8_0_rlc_stop(adev);
5197
5198         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5199             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5200                 /* Disable GFX parsing/prefetching */
5201                 gfx_v8_0_cp_gfx_enable(adev, false);
5202
5203         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5204             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5205             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5206             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5207                 int i;
5208
5209                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5210                         struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5211
5212                         mutex_lock(&adev->srbm_mutex);
5213                         vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5214                         gfx_v8_0_deactivate_hqd(adev, 2);
5215                         vi_srbm_select(adev, 0, 0, 0, 0);
5216                         mutex_unlock(&adev->srbm_mutex);
5217                 }
5218                 /* Disable MEC parsing/prefetching */
5219                 gfx_v8_0_cp_compute_enable(adev, false);
5220         }
5221
5222        return 0;
5223 }
5224
5225 static int gfx_v8_0_soft_reset(void *handle)
5226 {
5227         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5228         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5229         u32 tmp;
5230
5231         if ((!adev->gfx.grbm_soft_reset) &&
5232             (!adev->gfx.srbm_soft_reset))
5233                 return 0;
5234
5235         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5236         srbm_soft_reset = adev->gfx.srbm_soft_reset;
5237
5238         if (grbm_soft_reset || srbm_soft_reset) {
5239                 tmp = RREG32(mmGMCON_DEBUG);
5240                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 1);
5241                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 1);
5242                 WREG32(mmGMCON_DEBUG, tmp);
5243                 udelay(50);
5244         }
5245
5246         if (grbm_soft_reset) {
5247                 tmp = RREG32(mmGRBM_SOFT_RESET);
5248                 tmp |= grbm_soft_reset;
5249                 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5250                 WREG32(mmGRBM_SOFT_RESET, tmp);
5251                 tmp = RREG32(mmGRBM_SOFT_RESET);
5252
5253                 udelay(50);
5254
5255                 tmp &= ~grbm_soft_reset;
5256                 WREG32(mmGRBM_SOFT_RESET, tmp);
5257                 tmp = RREG32(mmGRBM_SOFT_RESET);
5258         }
5259
5260         if (srbm_soft_reset) {
5261                 tmp = RREG32(mmSRBM_SOFT_RESET);
5262                 tmp |= srbm_soft_reset;
5263                 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5264                 WREG32(mmSRBM_SOFT_RESET, tmp);
5265                 tmp = RREG32(mmSRBM_SOFT_RESET);
5266
5267                 udelay(50);
5268
5269                 tmp &= ~srbm_soft_reset;
5270                 WREG32(mmSRBM_SOFT_RESET, tmp);
5271                 tmp = RREG32(mmSRBM_SOFT_RESET);
5272         }
5273
5274         if (grbm_soft_reset || srbm_soft_reset) {
5275                 tmp = RREG32(mmGMCON_DEBUG);
5276                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 0);
5277                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 0);
5278                 WREG32(mmGMCON_DEBUG, tmp);
5279         }
5280
5281         /* Wait a little for things to settle down */
5282         udelay(50);
5283
5284         return 0;
5285 }
5286
5287 static int gfx_v8_0_post_soft_reset(void *handle)
5288 {
5289         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5290         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5291
5292         if ((!adev->gfx.grbm_soft_reset) &&
5293             (!adev->gfx.srbm_soft_reset))
5294                 return 0;
5295
5296         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5297         srbm_soft_reset = adev->gfx.srbm_soft_reset;
5298
5299         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5300             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5301                 gfx_v8_0_cp_gfx_resume(adev);
5302
5303         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5304             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5305             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5306             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5307                 int i;
5308
5309                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5310                         struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5311
5312                         mutex_lock(&adev->srbm_mutex);
5313                         vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5314                         gfx_v8_0_deactivate_hqd(adev, 2);
5315                         vi_srbm_select(adev, 0, 0, 0, 0);
5316                         mutex_unlock(&adev->srbm_mutex);
5317                 }
5318                 gfx_v8_0_kiq_resume(adev);
5319         }
5320         gfx_v8_0_rlc_start(adev);
5321
5322         return 0;
5323 }
5324
5325 /**
5326  * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
5327  *
5328  * @adev: amdgpu_device pointer
5329  *
5330  * Fetches a GPU clock counter snapshot.
5331  * Returns the 64 bit clock counter snapshot.
5332  */
5333 static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
5334 {
5335         uint64_t clock;
5336
5337         mutex_lock(&adev->gfx.gpu_clock_mutex);
5338         WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
5339         clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
5340                 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5341         mutex_unlock(&adev->gfx.gpu_clock_mutex);
5342         return clock;
5343 }
5344
5345 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
5346                                           uint32_t vmid,
5347                                           uint32_t gds_base, uint32_t gds_size,
5348                                           uint32_t gws_base, uint32_t gws_size,
5349                                           uint32_t oa_base, uint32_t oa_size)
5350 {
5351         gds_base = gds_base >> AMDGPU_GDS_SHIFT;
5352         gds_size = gds_size >> AMDGPU_GDS_SHIFT;
5353
5354         gws_base = gws_base >> AMDGPU_GWS_SHIFT;
5355         gws_size = gws_size >> AMDGPU_GWS_SHIFT;
5356
5357         oa_base = oa_base >> AMDGPU_OA_SHIFT;
5358         oa_size = oa_size >> AMDGPU_OA_SHIFT;
5359
5360         /* GDS Base */
5361         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5362         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5363                                 WRITE_DATA_DST_SEL(0)));
5364         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
5365         amdgpu_ring_write(ring, 0);
5366         amdgpu_ring_write(ring, gds_base);
5367
5368         /* GDS Size */
5369         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5370         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5371                                 WRITE_DATA_DST_SEL(0)));
5372         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
5373         amdgpu_ring_write(ring, 0);
5374         amdgpu_ring_write(ring, gds_size);
5375
5376         /* GWS */
5377         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5378         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5379                                 WRITE_DATA_DST_SEL(0)));
5380         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
5381         amdgpu_ring_write(ring, 0);
5382         amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
5383
5384         /* OA */
5385         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5386         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5387                                 WRITE_DATA_DST_SEL(0)));
5388         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
5389         amdgpu_ring_write(ring, 0);
5390         amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
5391 }
5392
5393 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
5394 {
5395         WREG32(mmSQ_IND_INDEX,
5396                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5397                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5398                 (address << SQ_IND_INDEX__INDEX__SHIFT) |
5399                 (SQ_IND_INDEX__FORCE_READ_MASK));
5400         return RREG32(mmSQ_IND_DATA);
5401 }
5402
5403 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
5404                            uint32_t wave, uint32_t thread,
5405                            uint32_t regno, uint32_t num, uint32_t *out)
5406 {
5407         WREG32(mmSQ_IND_INDEX,
5408                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5409                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5410                 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
5411                 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
5412                 (SQ_IND_INDEX__FORCE_READ_MASK) |
5413                 (SQ_IND_INDEX__AUTO_INCR_MASK));
5414         while (num--)
5415                 *(out++) = RREG32(mmSQ_IND_DATA);
5416 }
5417
5418 static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
5419 {
5420         /* type 0 wave data */
5421         dst[(*no_fields)++] = 0;
5422         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
5423         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
5424         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
5425         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
5426         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
5427         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
5428         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
5429         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
5430         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
5431         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
5432         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
5433         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
5434         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_LO);
5435         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_HI);
5436         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_LO);
5437         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI);
5438         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
5439         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
5440 }
5441
5442 static void gfx_v8_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
5443                                      uint32_t wave, uint32_t start,
5444                                      uint32_t size, uint32_t *dst)
5445 {
5446         wave_read_regs(
5447                 adev, simd, wave, 0,
5448                 start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
5449 }
5450
5451
5452 static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = {
5453         .get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter,
5454         .select_se_sh = &gfx_v8_0_select_se_sh,
5455         .read_wave_data = &gfx_v8_0_read_wave_data,
5456         .read_wave_sgprs = &gfx_v8_0_read_wave_sgprs,
5457 };
5458
5459 static int gfx_v8_0_early_init(void *handle)
5460 {
5461         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5462
5463         adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
5464         adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
5465         adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
5466         gfx_v8_0_set_ring_funcs(adev);
5467         gfx_v8_0_set_irq_funcs(adev);
5468         gfx_v8_0_set_gds_init(adev);
5469         gfx_v8_0_set_rlc_funcs(adev);
5470
5471         return 0;
5472 }
5473
5474 static int gfx_v8_0_late_init(void *handle)
5475 {
5476         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5477         int r;
5478
5479         r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
5480         if (r)
5481                 return r;
5482
5483         r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
5484         if (r)
5485                 return r;
5486
5487         /* requires IBs so do in late init after IB pool is initialized */
5488         r = gfx_v8_0_do_edc_gpr_workarounds(adev);
5489         if (r)
5490                 return r;
5491
5492         amdgpu_set_powergating_state(adev,
5493                         AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_GATE);
5494
5495         return 0;
5496 }
5497
5498 static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
5499                                                        bool enable)
5500 {
5501         if ((adev->asic_type == CHIP_POLARIS11) ||
5502             (adev->asic_type == CHIP_POLARIS12))
5503                 /* Send msg to SMU via Powerplay */
5504                 amdgpu_set_powergating_state(adev,
5505                                              AMD_IP_BLOCK_TYPE_SMC,
5506                                              enable ?
5507                                              AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE);
5508
5509         WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0);
5510 }
5511
5512 static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
5513                                                         bool enable)
5514 {
5515         WREG32_FIELD(RLC_PG_CNTL, DYN_PER_CU_PG_ENABLE, enable ? 1 : 0);
5516 }
5517
5518 static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
5519                 bool enable)
5520 {
5521         WREG32_FIELD(RLC_PG_CNTL, QUICK_PG_ENABLE, enable ? 1 : 0);
5522 }
5523
5524 static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
5525                                           bool enable)
5526 {
5527         WREG32_FIELD(RLC_PG_CNTL, GFX_POWER_GATING_ENABLE, enable ? 1 : 0);
5528 }
5529
5530 static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev,
5531                                                 bool enable)
5532 {
5533         WREG32_FIELD(RLC_PG_CNTL, GFX_PIPELINE_PG_ENABLE, enable ? 1 : 0);
5534
5535         /* Read any GFX register to wake up GFX. */
5536         if (!enable)
5537                 RREG32(mmDB_RENDER_CONTROL);
5538 }
5539
5540 static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev,
5541                                           bool enable)
5542 {
5543         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
5544                 cz_enable_gfx_cg_power_gating(adev, true);
5545                 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
5546                         cz_enable_gfx_pipeline_power_gating(adev, true);
5547         } else {
5548                 cz_enable_gfx_cg_power_gating(adev, false);
5549                 cz_enable_gfx_pipeline_power_gating(adev, false);
5550         }
5551 }
5552
5553 static int gfx_v8_0_set_powergating_state(void *handle,
5554                                           enum amd_powergating_state state)
5555 {
5556         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5557         bool enable = (state == AMD_PG_STATE_GATE);
5558
5559         if (amdgpu_sriov_vf(adev))
5560                 return 0;
5561
5562         switch (adev->asic_type) {
5563         case CHIP_CARRIZO:
5564         case CHIP_STONEY:
5565
5566                 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
5567                         cz_enable_sck_slow_down_on_power_up(adev, true);
5568                         cz_enable_sck_slow_down_on_power_down(adev, true);
5569                 } else {
5570                         cz_enable_sck_slow_down_on_power_up(adev, false);
5571                         cz_enable_sck_slow_down_on_power_down(adev, false);
5572                 }
5573                 if (adev->pg_flags & AMD_PG_SUPPORT_CP)
5574                         cz_enable_cp_power_gating(adev, true);
5575                 else
5576                         cz_enable_cp_power_gating(adev, false);
5577
5578                 cz_update_gfx_cg_power_gating(adev, enable);
5579
5580                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5581                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5582                 else
5583                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5584
5585                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5586                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5587                 else
5588                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5589                 break;
5590         case CHIP_POLARIS11:
5591         case CHIP_POLARIS12:
5592                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5593                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5594                 else
5595                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5596
5597                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5598                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5599                 else
5600                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5601
5602                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable)
5603                         polaris11_enable_gfx_quick_mg_power_gating(adev, true);
5604                 else
5605                         polaris11_enable_gfx_quick_mg_power_gating(adev, false);
5606                 break;
5607         default:
5608                 break;
5609         }
5610
5611         return 0;
5612 }
5613
5614 static void gfx_v8_0_get_clockgating_state(void *handle, u32 *flags)
5615 {
5616         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5617         int data;
5618
5619         if (amdgpu_sriov_vf(adev))
5620                 *flags = 0;
5621
5622         /* AMD_CG_SUPPORT_GFX_MGCG */
5623         data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5624         if (!(data & RLC_CGTT_MGCG_OVERRIDE__CPF_MASK))
5625                 *flags |= AMD_CG_SUPPORT_GFX_MGCG;
5626
5627         /* AMD_CG_SUPPORT_GFX_CGLG */
5628         data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5629         if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5630                 *flags |= AMD_CG_SUPPORT_GFX_CGCG;
5631
5632         /* AMD_CG_SUPPORT_GFX_CGLS */
5633         if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5634                 *flags |= AMD_CG_SUPPORT_GFX_CGLS;
5635
5636         /* AMD_CG_SUPPORT_GFX_CGTS */
5637         data = RREG32(mmCGTS_SM_CTRL_REG);
5638         if (!(data & CGTS_SM_CTRL_REG__OVERRIDE_MASK))
5639                 *flags |= AMD_CG_SUPPORT_GFX_CGTS;
5640
5641         /* AMD_CG_SUPPORT_GFX_CGTS_LS */
5642         if (!(data & CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK))
5643                 *flags |= AMD_CG_SUPPORT_GFX_CGTS_LS;
5644
5645         /* AMD_CG_SUPPORT_GFX_RLC_LS */
5646         data = RREG32(mmRLC_MEM_SLP_CNTL);
5647         if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
5648                 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
5649
5650         /* AMD_CG_SUPPORT_GFX_CP_LS */
5651         data = RREG32(mmCP_MEM_SLP_CNTL);
5652         if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
5653                 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
5654 }
5655
5656 static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
5657                                      uint32_t reg_addr, uint32_t cmd)
5658 {
5659         uint32_t data;
5660
5661         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
5662
5663         WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5664         WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5665
5666         data = RREG32(mmRLC_SERDES_WR_CTRL);
5667         if (adev->asic_type == CHIP_STONEY)
5668                 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5669                           RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5670                           RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5671                           RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5672                           RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5673                           RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5674                           RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5675                           RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5676                           RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5677         else
5678                 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5679                           RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5680                           RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5681                           RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5682                           RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5683                           RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5684                           RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5685                           RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5686                           RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
5687                           RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
5688                           RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5689         data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
5690                  (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
5691                  (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
5692                  (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
5693
5694         WREG32(mmRLC_SERDES_WR_CTRL, data);
5695 }
5696
5697 #define MSG_ENTER_RLC_SAFE_MODE     1
5698 #define MSG_EXIT_RLC_SAFE_MODE      0
5699 #define RLC_GPR_REG2__REQ_MASK 0x00000001
5700 #define RLC_GPR_REG2__REQ__SHIFT 0
5701 #define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001
5702 #define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e
5703
5704 static void iceland_enter_rlc_safe_mode(struct amdgpu_device *adev)
5705 {
5706         u32 data;
5707         unsigned i;
5708
5709         data = RREG32(mmRLC_CNTL);
5710         if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5711                 return;
5712
5713         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5714                 data |= RLC_SAFE_MODE__CMD_MASK;
5715                 data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5716                 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
5717                 WREG32(mmRLC_SAFE_MODE, data);
5718
5719                 for (i = 0; i < adev->usec_timeout; i++) {
5720                         if ((RREG32(mmRLC_GPM_STAT) &
5721                              (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5722                               RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5723                             (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5724                              RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5725                                 break;
5726                         udelay(1);
5727                 }
5728
5729                 for (i = 0; i < adev->usec_timeout; i++) {
5730                         if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5731                                 break;
5732                         udelay(1);
5733                 }
5734                 adev->gfx.rlc.in_safe_mode = true;
5735         }
5736 }
5737
5738 static void iceland_exit_rlc_safe_mode(struct amdgpu_device *adev)
5739 {
5740         u32 data = 0;
5741         unsigned i;
5742
5743         data = RREG32(mmRLC_CNTL);
5744         if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5745                 return;
5746
5747         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5748                 if (adev->gfx.rlc.in_safe_mode) {
5749                         data |= RLC_SAFE_MODE__CMD_MASK;
5750                         data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5751                         WREG32(mmRLC_SAFE_MODE, data);
5752                         adev->gfx.rlc.in_safe_mode = false;
5753                 }
5754         }
5755
5756         for (i = 0; i < adev->usec_timeout; i++) {
5757                 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5758                         break;
5759                 udelay(1);
5760         }
5761 }
5762
5763 static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
5764         .enter_safe_mode = iceland_enter_rlc_safe_mode,
5765         .exit_safe_mode = iceland_exit_rlc_safe_mode
5766 };
5767
5768 static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
5769                                                       bool enable)
5770 {
5771         uint32_t temp, data;
5772
5773         adev->gfx.rlc.funcs->enter_safe_mode(adev);
5774
5775         /* It is disabled by HW by default */
5776         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
5777                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5778                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS)
5779                                 /* 1 - RLC memory Light sleep */
5780                                 WREG32_FIELD(RLC_MEM_SLP_CNTL, RLC_MEM_LS_EN, 1);
5781
5782                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS)
5783                                 WREG32_FIELD(CP_MEM_SLP_CNTL, CP_MEM_LS_EN, 1);
5784                 }
5785
5786                 /* 3 - RLC_CGTT_MGCG_OVERRIDE */
5787                 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5788                 if (adev->flags & AMD_IS_APU)
5789                         data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5790                                   RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5791                                   RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK);
5792                 else
5793                         data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5794                                   RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5795                                   RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5796                                   RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5797
5798                 if (temp != data)
5799                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5800
5801                 /* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5802                 gfx_v8_0_wait_for_rlc_serdes(adev);
5803
5804                 /* 5 - clear mgcg override */
5805                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5806
5807                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
5808                         /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
5809                         temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5810                         data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
5811                         data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
5812                         data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
5813                         data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
5814                         if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
5815                             (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
5816                                 data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
5817                         data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
5818                         data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
5819                         if (temp != data)
5820                                 WREG32(mmCGTS_SM_CTRL_REG, data);
5821                 }
5822                 udelay(50);
5823
5824                 /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5825                 gfx_v8_0_wait_for_rlc_serdes(adev);
5826         } else {
5827                 /* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
5828                 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5829                 data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5830                                 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5831                                 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5832                                 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5833                 if (temp != data)
5834                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5835
5836                 /* 2 - disable MGLS in RLC */
5837                 data = RREG32(mmRLC_MEM_SLP_CNTL);
5838                 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
5839                         data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5840                         WREG32(mmRLC_MEM_SLP_CNTL, data);
5841                 }
5842
5843                 /* 3 - disable MGLS in CP */
5844                 data = RREG32(mmCP_MEM_SLP_CNTL);
5845                 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
5846                         data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5847                         WREG32(mmCP_MEM_SLP_CNTL, data);
5848                 }
5849
5850                 /* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
5851                 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5852                 data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
5853                                 CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
5854                 if (temp != data)
5855                         WREG32(mmCGTS_SM_CTRL_REG, data);
5856
5857                 /* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5858                 gfx_v8_0_wait_for_rlc_serdes(adev);
5859
5860                 /* 6 - set mgcg override */
5861                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5862
5863                 udelay(50);
5864
5865                 /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5866                 gfx_v8_0_wait_for_rlc_serdes(adev);
5867         }
5868
5869         adev->gfx.rlc.funcs->exit_safe_mode(adev);
5870 }
5871
5872 static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5873                                                       bool enable)
5874 {
5875         uint32_t temp, temp1, data, data1;
5876
5877         temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5878
5879         adev->gfx.rlc.funcs->enter_safe_mode(adev);
5880
5881         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
5882                 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5883                 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
5884                 if (temp1 != data1)
5885                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5886
5887                 /* : wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5888                 gfx_v8_0_wait_for_rlc_serdes(adev);
5889
5890                 /* 2 - clear cgcg override */
5891                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5892
5893                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5894                 gfx_v8_0_wait_for_rlc_serdes(adev);
5895
5896                 /* 3 - write cmd to set CGLS */
5897                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
5898
5899                 /* 4 - enable cgcg */
5900                 data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5901
5902                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5903                         /* enable cgls*/
5904                         data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5905
5906                         temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5907                         data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
5908
5909                         if (temp1 != data1)
5910                                 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5911                 } else {
5912                         data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5913                 }
5914
5915                 if (temp != data)
5916                         WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5917
5918                 /* 5 enable cntx_empty_int_enable/cntx_busy_int_enable/
5919                  * Cmp_busy/GFX_Idle interrupts
5920                  */
5921                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5922         } else {
5923                 /* disable cntx_empty_int_enable & GFX Idle interrupt */
5924                 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5925
5926                 /* TEST CGCG */
5927                 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5928                 data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
5929                                 RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
5930                 if (temp1 != data1)
5931                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5932
5933                 /* read gfx register to wake up cgcg */
5934                 RREG32(mmCB_CGTT_SCLK_CTRL);
5935                 RREG32(mmCB_CGTT_SCLK_CTRL);
5936                 RREG32(mmCB_CGTT_SCLK_CTRL);
5937                 RREG32(mmCB_CGTT_SCLK_CTRL);
5938
5939                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5940                 gfx_v8_0_wait_for_rlc_serdes(adev);
5941
5942                 /* write cmd to Set CGCG Overrride */
5943                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5944
5945                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5946                 gfx_v8_0_wait_for_rlc_serdes(adev);
5947
5948                 /* write cmd to Clear CGLS */
5949                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
5950
5951                 /* disable cgcg, cgls should be disabled too. */
5952                 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
5953                           RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
5954                 if (temp != data)
5955                         WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5956                 /* enable interrupts again for PG */
5957                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5958         }
5959
5960         gfx_v8_0_wait_for_rlc_serdes(adev);
5961
5962         adev->gfx.rlc.funcs->exit_safe_mode(adev);
5963 }
5964 static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
5965                                             bool enable)
5966 {
5967         if (enable) {
5968                 /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
5969                  * ===  MGCG + MGLS + TS(CG/LS) ===
5970                  */
5971                 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5972                 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5973         } else {
5974                 /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
5975                  * ===  CGCG + CGLS ===
5976                  */
5977                 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5978                 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5979         }
5980         return 0;
5981 }
5982
5983 static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev,
5984                                           enum amd_clockgating_state state)
5985 {
5986         uint32_t msg_id, pp_state = 0;
5987         uint32_t pp_support_state = 0;
5988
5989         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
5990                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5991                         pp_support_state = PP_STATE_SUPPORT_LS;
5992                         pp_state = PP_STATE_LS;
5993                 }
5994                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
5995                         pp_support_state |= PP_STATE_SUPPORT_CG;
5996                         pp_state |= PP_STATE_CG;
5997                 }
5998                 if (state == AMD_CG_STATE_UNGATE)
5999                         pp_state = 0;
6000
6001                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6002                                 PP_BLOCK_GFX_CG,
6003                                 pp_support_state,
6004                                 pp_state);
6005                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6006                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6007         }
6008
6009         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
6010                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
6011                         pp_support_state = PP_STATE_SUPPORT_LS;
6012                         pp_state = PP_STATE_LS;
6013                 }
6014
6015                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
6016                         pp_support_state |= PP_STATE_SUPPORT_CG;
6017                         pp_state |= PP_STATE_CG;
6018                 }
6019
6020                 if (state == AMD_CG_STATE_UNGATE)
6021                         pp_state = 0;
6022
6023                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6024                                 PP_BLOCK_GFX_MG,
6025                                 pp_support_state,
6026                                 pp_state);
6027                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6028                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6029         }
6030
6031         return 0;
6032 }
6033
6034 static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev,
6035                                           enum amd_clockgating_state state)
6036 {
6037
6038         uint32_t msg_id, pp_state = 0;
6039         uint32_t pp_support_state = 0;
6040
6041         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
6042                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
6043                         pp_support_state = PP_STATE_SUPPORT_LS;
6044                         pp_state = PP_STATE_LS;
6045                 }
6046                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
6047                         pp_support_state |= PP_STATE_SUPPORT_CG;
6048                         pp_state |= PP_STATE_CG;
6049                 }
6050                 if (state == AMD_CG_STATE_UNGATE)
6051                         pp_state = 0;
6052
6053                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6054                                 PP_BLOCK_GFX_CG,
6055                                 pp_support_state,
6056                                 pp_state);
6057                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6058                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6059         }
6060
6061         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_GFX_3D_CGLS)) {
6062                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) {
6063                         pp_support_state = PP_STATE_SUPPORT_LS;
6064                         pp_state = PP_STATE_LS;
6065                 }
6066                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) {
6067                         pp_support_state |= PP_STATE_SUPPORT_CG;
6068                         pp_state |= PP_STATE_CG;
6069                 }
6070                 if (state == AMD_CG_STATE_UNGATE)
6071                         pp_state = 0;
6072
6073                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6074                                 PP_BLOCK_GFX_3D,
6075                                 pp_support_state,
6076                                 pp_state);
6077                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6078                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6079         }
6080
6081         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
6082                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
6083                         pp_support_state = PP_STATE_SUPPORT_LS;
6084                         pp_state = PP_STATE_LS;
6085                 }
6086
6087                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
6088                         pp_support_state |= PP_STATE_SUPPORT_CG;
6089                         pp_state |= PP_STATE_CG;
6090                 }
6091
6092                 if (state == AMD_CG_STATE_UNGATE)
6093                         pp_state = 0;
6094
6095                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6096                                 PP_BLOCK_GFX_MG,
6097                                 pp_support_state,
6098                                 pp_state);
6099                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6100                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6101         }
6102
6103         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
6104                 pp_support_state = PP_STATE_SUPPORT_LS;
6105
6106                 if (state == AMD_CG_STATE_UNGATE)
6107                         pp_state = 0;
6108                 else
6109                         pp_state = PP_STATE_LS;
6110
6111                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6112                                 PP_BLOCK_GFX_RLC,
6113                                 pp_support_state,
6114                                 pp_state);
6115                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6116                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6117         }
6118
6119         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
6120                 pp_support_state = PP_STATE_SUPPORT_LS;
6121
6122                 if (state == AMD_CG_STATE_UNGATE)
6123                         pp_state = 0;
6124                 else
6125                         pp_state = PP_STATE_LS;
6126                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6127                         PP_BLOCK_GFX_CP,
6128                         pp_support_state,
6129                         pp_state);
6130                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6131                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6132         }
6133
6134         return 0;
6135 }
6136
6137 static int gfx_v8_0_set_clockgating_state(void *handle,
6138                                           enum amd_clockgating_state state)
6139 {
6140         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
6141
6142         if (amdgpu_sriov_vf(adev))
6143                 return 0;
6144
6145         switch (adev->asic_type) {
6146         case CHIP_FIJI:
6147         case CHIP_CARRIZO:
6148         case CHIP_STONEY:
6149                 gfx_v8_0_update_gfx_clock_gating(adev,
6150                                                  state == AMD_CG_STATE_GATE);
6151                 break;
6152         case CHIP_TONGA:
6153                 gfx_v8_0_tonga_update_gfx_clock_gating(adev, state);
6154                 break;
6155         case CHIP_POLARIS10:
6156         case CHIP_POLARIS11:
6157         case CHIP_POLARIS12:
6158                 gfx_v8_0_polaris_update_gfx_clock_gating(adev, state);
6159                 break;
6160         default:
6161                 break;
6162         }
6163         return 0;
6164 }
6165
6166 static u64 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring)
6167 {
6168         return ring->adev->wb.wb[ring->rptr_offs];
6169 }
6170
6171 static u64 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
6172 {
6173         struct amdgpu_device *adev = ring->adev;
6174
6175         if (ring->use_doorbell)
6176                 /* XXX check if swapping is necessary on BE */
6177                 return ring->adev->wb.wb[ring->wptr_offs];
6178         else
6179                 return RREG32(mmCP_RB0_WPTR);
6180 }
6181
6182 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
6183 {
6184         struct amdgpu_device *adev = ring->adev;
6185
6186         if (ring->use_doorbell) {
6187                 /* XXX check if swapping is necessary on BE */
6188                 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6189                 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6190         } else {
6191                 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
6192                 (void)RREG32(mmCP_RB0_WPTR);
6193         }
6194 }
6195
6196 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
6197 {
6198         u32 ref_and_mask, reg_mem_engine;
6199
6200         if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) ||
6201             (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)) {
6202                 switch (ring->me) {
6203                 case 1:
6204                         ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
6205                         break;
6206                 case 2:
6207                         ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
6208                         break;
6209                 default:
6210                         return;
6211                 }
6212                 reg_mem_engine = 0;
6213         } else {
6214                 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
6215                 reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
6216         }
6217
6218         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6219         amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
6220                                  WAIT_REG_MEM_FUNCTION(3) |  /* == */
6221                                  reg_mem_engine));
6222         amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
6223         amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
6224         amdgpu_ring_write(ring, ref_and_mask);
6225         amdgpu_ring_write(ring, ref_and_mask);
6226         amdgpu_ring_write(ring, 0x20); /* poll interval */
6227 }
6228
6229 static void gfx_v8_0_ring_emit_vgt_flush(struct amdgpu_ring *ring)
6230 {
6231         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6232         amdgpu_ring_write(ring, EVENT_TYPE(VS_PARTIAL_FLUSH) |
6233                 EVENT_INDEX(4));
6234
6235         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6236         amdgpu_ring_write(ring, EVENT_TYPE(VGT_FLUSH) |
6237                 EVENT_INDEX(0));
6238 }
6239
6240
6241 static void gfx_v8_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
6242 {
6243         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6244         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6245                                  WRITE_DATA_DST_SEL(0) |
6246                                  WR_CONFIRM));
6247         amdgpu_ring_write(ring, mmHDP_DEBUG0);
6248         amdgpu_ring_write(ring, 0);
6249         amdgpu_ring_write(ring, 1);
6250
6251 }
6252
6253 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
6254                                       struct amdgpu_ib *ib,
6255                                       unsigned vm_id, bool ctx_switch)
6256 {
6257         u32 header, control = 0;
6258
6259         if (ib->flags & AMDGPU_IB_FLAG_CE)
6260                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
6261         else
6262                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
6263
6264         control |= ib->length_dw | (vm_id << 24);
6265
6266         if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
6267                 control |= INDIRECT_BUFFER_PRE_ENB(1);
6268
6269                 if (!(ib->flags & AMDGPU_IB_FLAG_CE))
6270                         gfx_v8_0_ring_emit_de_meta(ring);
6271         }
6272
6273         amdgpu_ring_write(ring, header);
6274         amdgpu_ring_write(ring,
6275 #ifdef __BIG_ENDIAN
6276                           (2 << 0) |
6277 #endif
6278                           (ib->gpu_addr & 0xFFFFFFFC));
6279         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6280         amdgpu_ring_write(ring, control);
6281 }
6282
6283 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
6284                                           struct amdgpu_ib *ib,
6285                                           unsigned vm_id, bool ctx_switch)
6286 {
6287         u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vm_id << 24);
6288
6289         amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
6290         amdgpu_ring_write(ring,
6291 #ifdef __BIG_ENDIAN
6292                                 (2 << 0) |
6293 #endif
6294                                 (ib->gpu_addr & 0xFFFFFFFC));
6295         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6296         amdgpu_ring_write(ring, control);
6297 }
6298
6299 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
6300                                          u64 seq, unsigned flags)
6301 {
6302         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6303         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6304
6305         /* EVENT_WRITE_EOP - flush caches, send int */
6306         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6307         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6308                                  EOP_TC_ACTION_EN |
6309                                  EOP_TC_WB_ACTION_EN |
6310                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6311                                  EVENT_INDEX(5)));
6312         amdgpu_ring_write(ring, addr & 0xfffffffc);
6313         amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
6314                           DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6315         amdgpu_ring_write(ring, lower_32_bits(seq));
6316         amdgpu_ring_write(ring, upper_32_bits(seq));
6317
6318 }
6319
6320 static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
6321 {
6322         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6323         uint32_t seq = ring->fence_drv.sync_seq;
6324         uint64_t addr = ring->fence_drv.gpu_addr;
6325
6326         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6327         amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
6328                                  WAIT_REG_MEM_FUNCTION(3) | /* equal */
6329                                  WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
6330         amdgpu_ring_write(ring, addr & 0xfffffffc);
6331         amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
6332         amdgpu_ring_write(ring, seq);
6333         amdgpu_ring_write(ring, 0xffffffff);
6334         amdgpu_ring_write(ring, 4); /* poll interval */
6335 }
6336
6337 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
6338                                         unsigned vm_id, uint64_t pd_addr)
6339 {
6340         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6341
6342         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6343         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6344                                  WRITE_DATA_DST_SEL(0)) |
6345                                  WR_CONFIRM);
6346         if (vm_id < 8) {
6347                 amdgpu_ring_write(ring,
6348                                   (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id));
6349         } else {
6350                 amdgpu_ring_write(ring,
6351                                   (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vm_id - 8));
6352         }
6353         amdgpu_ring_write(ring, 0);
6354         amdgpu_ring_write(ring, pd_addr >> 12);
6355
6356         /* bits 0-15 are the VM contexts0-15 */
6357         /* invalidate the cache */
6358         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6359         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6360                                  WRITE_DATA_DST_SEL(0)));
6361         amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6362         amdgpu_ring_write(ring, 0);
6363         amdgpu_ring_write(ring, 1 << vm_id);
6364
6365         /* wait for the invalidate to complete */
6366         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6367         amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6368                                  WAIT_REG_MEM_FUNCTION(0) |  /* always */
6369                                  WAIT_REG_MEM_ENGINE(0))); /* me */
6370         amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6371         amdgpu_ring_write(ring, 0);
6372         amdgpu_ring_write(ring, 0); /* ref */
6373         amdgpu_ring_write(ring, 0); /* mask */
6374         amdgpu_ring_write(ring, 0x20); /* poll interval */
6375
6376         /* compute doesn't have PFP */
6377         if (usepfp) {
6378                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
6379                 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6380                 amdgpu_ring_write(ring, 0x0);
6381         }
6382 }
6383
6384 static u64 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
6385 {
6386         return ring->adev->wb.wb[ring->wptr_offs];
6387 }
6388
6389 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
6390 {
6391         struct amdgpu_device *adev = ring->adev;
6392
6393         /* XXX check if swapping is necessary on BE */
6394         adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6395         WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6396 }
6397
6398 static void gfx_v8_0_ring_set_pipe_percent(struct amdgpu_ring *ring,
6399                                            bool acquire)
6400 {
6401         struct amdgpu_device *adev = ring->adev;
6402         int pipe_num, tmp, reg;
6403         int pipe_percent = acquire ? SPI_WCL_PIPE_PERCENT_GFX__VALUE_MASK : 0x1;
6404
6405         pipe_num = ring->me * adev->gfx.mec.num_pipe_per_mec + ring->pipe;
6406
6407         /* first me only has 2 entries, GFX and HP3D */
6408         if (ring->me > 0)
6409                 pipe_num -= 2;
6410
6411         reg = mmSPI_WCL_PIPE_PERCENT_GFX + pipe_num;
6412         tmp = RREG32(reg);
6413         tmp = REG_SET_FIELD(tmp, SPI_WCL_PIPE_PERCENT_GFX, VALUE, pipe_percent);
6414         WREG32(reg, tmp);
6415 }
6416
6417 static void gfx_v8_0_pipe_reserve_resources(struct amdgpu_device *adev,
6418                                             struct amdgpu_ring *ring,
6419                                             bool acquire)
6420 {
6421         int i, pipe;
6422         bool reserve;
6423         struct amdgpu_ring *iring;
6424
6425         mutex_lock(&adev->gfx.pipe_reserve_mutex);
6426         pipe = amdgpu_gfx_queue_to_bit(adev, ring->me, ring->pipe, 0);
6427         if (acquire)
6428                 set_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6429         else
6430                 clear_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6431
6432         if (!bitmap_weight(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)) {
6433                 /* Clear all reservations - everyone reacquires all resources */
6434                 for (i = 0; i < adev->gfx.num_gfx_rings; ++i)
6435                         gfx_v8_0_ring_set_pipe_percent(&adev->gfx.gfx_ring[i],
6436                                                        true);
6437
6438                 for (i = 0; i < adev->gfx.num_compute_rings; ++i)
6439                         gfx_v8_0_ring_set_pipe_percent(&adev->gfx.compute_ring[i],
6440                                                        true);
6441         } else {
6442                 /* Lower all pipes without a current reservation */
6443                 for (i = 0; i < adev->gfx.num_gfx_rings; ++i) {
6444                         iring = &adev->gfx.gfx_ring[i];
6445                         pipe = amdgpu_gfx_queue_to_bit(adev,
6446                                                        iring->me,
6447                                                        iring->pipe,
6448                                                        0);
6449                         reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6450                         gfx_v8_0_ring_set_pipe_percent(iring, reserve);
6451                 }
6452
6453                 for (i = 0; i < adev->gfx.num_compute_rings; ++i) {
6454                         iring = &adev->gfx.compute_ring[i];
6455                         pipe = amdgpu_gfx_queue_to_bit(adev,
6456                                                        iring->me,
6457                                                        iring->pipe,
6458                                                        0);
6459                         reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6460                         gfx_v8_0_ring_set_pipe_percent(iring, reserve);
6461                 }
6462         }
6463
6464         mutex_unlock(&adev->gfx.pipe_reserve_mutex);
6465 }
6466
6467 static void gfx_v8_0_hqd_set_priority(struct amdgpu_device *adev,
6468                                       struct amdgpu_ring *ring,
6469                                       bool acquire)
6470 {
6471         uint32_t pipe_priority = acquire ? 0x2 : 0x0;
6472         uint32_t queue_priority = acquire ? 0xf : 0x0;
6473
6474         mutex_lock(&adev->srbm_mutex);
6475         vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
6476
6477         WREG32(mmCP_HQD_PIPE_PRIORITY, pipe_priority);
6478         WREG32(mmCP_HQD_QUEUE_PRIORITY, queue_priority);
6479
6480         vi_srbm_select(adev, 0, 0, 0, 0);
6481         mutex_unlock(&adev->srbm_mutex);
6482 }
6483 static void gfx_v8_0_ring_set_priority_compute(struct amdgpu_ring *ring,
6484                                                enum amd_sched_priority priority)
6485 {
6486         struct amdgpu_device *adev = ring->adev;
6487         bool acquire = priority == AMD_SCHED_PRIORITY_HIGH_HW;
6488
6489         if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE)
6490                 return;
6491
6492         gfx_v8_0_hqd_set_priority(adev, ring, acquire);
6493         gfx_v8_0_pipe_reserve_resources(adev, ring, acquire);
6494 }
6495
6496 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
6497                                              u64 addr, u64 seq,
6498                                              unsigned flags)
6499 {
6500         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6501         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6502
6503         /* RELEASE_MEM - flush caches, send int */
6504         amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
6505         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6506                                  EOP_TC_ACTION_EN |
6507                                  EOP_TC_WB_ACTION_EN |
6508                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6509                                  EVENT_INDEX(5)));
6510         amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6511         amdgpu_ring_write(ring, addr & 0xfffffffc);
6512         amdgpu_ring_write(ring, upper_32_bits(addr));
6513         amdgpu_ring_write(ring, lower_32_bits(seq));
6514         amdgpu_ring_write(ring, upper_32_bits(seq));
6515 }
6516
6517 static void gfx_v8_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
6518                                          u64 seq, unsigned int flags)
6519 {
6520         /* we only allocate 32bit for each seq wb address */
6521         BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
6522
6523         /* write fence seq to the "addr" */
6524         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6525         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6526                                  WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
6527         amdgpu_ring_write(ring, lower_32_bits(addr));
6528         amdgpu_ring_write(ring, upper_32_bits(addr));
6529         amdgpu_ring_write(ring, lower_32_bits(seq));
6530
6531         if (flags & AMDGPU_FENCE_FLAG_INT) {
6532                 /* set register to trigger INT */
6533                 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6534                 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6535                                          WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
6536                 amdgpu_ring_write(ring, mmCPC_INT_STATUS);
6537                 amdgpu_ring_write(ring, 0);
6538                 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
6539         }
6540 }
6541
6542 static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring)
6543 {
6544         amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6545         amdgpu_ring_write(ring, 0);
6546 }
6547
6548 static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
6549 {
6550         uint32_t dw2 = 0;
6551
6552         if (amdgpu_sriov_vf(ring->adev))
6553                 gfx_v8_0_ring_emit_ce_meta(ring);
6554
6555         dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
6556         if (flags & AMDGPU_HAVE_CTX_SWITCH) {
6557                 gfx_v8_0_ring_emit_vgt_flush(ring);
6558                 /* set load_global_config & load_global_uconfig */
6559                 dw2 |= 0x8001;
6560                 /* set load_cs_sh_regs */
6561                 dw2 |= 0x01000000;
6562                 /* set load_per_context_state & load_gfx_sh_regs for GFX */
6563                 dw2 |= 0x10002;
6564
6565                 /* set load_ce_ram if preamble presented */
6566                 if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
6567                         dw2 |= 0x10000000;
6568         } else {
6569                 /* still load_ce_ram if this is the first time preamble presented
6570                  * although there is no context switch happens.
6571                  */
6572                 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
6573                         dw2 |= 0x10000000;
6574         }
6575
6576         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6577         amdgpu_ring_write(ring, dw2);
6578         amdgpu_ring_write(ring, 0);
6579 }
6580
6581 static unsigned gfx_v8_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
6582 {
6583         unsigned ret;
6584
6585         amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
6586         amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
6587         amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
6588         amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
6589         ret = ring->wptr & ring->buf_mask;
6590         amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
6591         return ret;
6592 }
6593
6594 static void gfx_v8_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
6595 {
6596         unsigned cur;
6597
6598         BUG_ON(offset > ring->buf_mask);
6599         BUG_ON(ring->ring[offset] != 0x55aa55aa);
6600
6601         cur = (ring->wptr & ring->buf_mask) - 1;
6602         if (likely(cur > offset))
6603                 ring->ring[offset] = cur - offset;
6604         else
6605                 ring->ring[offset] = (ring->ring_size >> 2) - offset + cur;
6606 }
6607
6608 static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
6609 {
6610         struct amdgpu_device *adev = ring->adev;
6611
6612         amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
6613         amdgpu_ring_write(ring, 0 |     /* src: register*/
6614                                 (5 << 8) |      /* dst: memory */
6615                                 (1 << 20));     /* write confirm */
6616         amdgpu_ring_write(ring, reg);
6617         amdgpu_ring_write(ring, 0);
6618         amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
6619                                 adev->virt.reg_val_offs * 4));
6620         amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
6621                                 adev->virt.reg_val_offs * 4));
6622 }
6623
6624 static void gfx_v8_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
6625                                   uint32_t val)
6626 {
6627         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6628         amdgpu_ring_write(ring, (1 << 16)); /* no inc addr */
6629         amdgpu_ring_write(ring, reg);
6630         amdgpu_ring_write(ring, 0);
6631         amdgpu_ring_write(ring, val);
6632 }
6633
6634 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
6635                                                  enum amdgpu_interrupt_state state)
6636 {
6637         WREG32_FIELD(CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE,
6638                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6639 }
6640
6641 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
6642                                                      int me, int pipe,
6643                                                      enum amdgpu_interrupt_state state)
6644 {
6645         u32 mec_int_cntl, mec_int_cntl_reg;
6646
6647         /*
6648          * amdgpu controls only the first MEC. That's why this function only
6649          * handles the setting of interrupts for this specific MEC. All other
6650          * pipes' interrupts are set by amdkfd.
6651          */
6652
6653         if (me == 1) {
6654                 switch (pipe) {
6655                 case 0:
6656                         mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
6657                         break;
6658                 case 1:
6659                         mec_int_cntl_reg = mmCP_ME1_PIPE1_INT_CNTL;
6660                         break;
6661                 case 2:
6662                         mec_int_cntl_reg = mmCP_ME1_PIPE2_INT_CNTL;
6663                         break;
6664                 case 3:
6665                         mec_int_cntl_reg = mmCP_ME1_PIPE3_INT_CNTL;
6666                         break;
6667                 default:
6668                         DRM_DEBUG("invalid pipe %d\n", pipe);
6669                         return;
6670                 }
6671         } else {
6672                 DRM_DEBUG("invalid me %d\n", me);
6673                 return;
6674         }
6675
6676         switch (state) {
6677         case AMDGPU_IRQ_STATE_DISABLE:
6678                 mec_int_cntl = RREG32(mec_int_cntl_reg);
6679                 mec_int_cntl &= ~CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6680                 WREG32(mec_int_cntl_reg, mec_int_cntl);
6681                 break;
6682         case AMDGPU_IRQ_STATE_ENABLE:
6683                 mec_int_cntl = RREG32(mec_int_cntl_reg);
6684                 mec_int_cntl |= CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6685                 WREG32(mec_int_cntl_reg, mec_int_cntl);
6686                 break;
6687         default:
6688                 break;
6689         }
6690 }
6691
6692 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
6693                                              struct amdgpu_irq_src *source,
6694                                              unsigned type,
6695                                              enum amdgpu_interrupt_state state)
6696 {
6697         WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE,
6698                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6699
6700         return 0;
6701 }
6702
6703 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
6704                                               struct amdgpu_irq_src *source,
6705                                               unsigned type,
6706                                               enum amdgpu_interrupt_state state)
6707 {
6708         WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE,
6709                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6710
6711         return 0;
6712 }
6713
6714 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6715                                             struct amdgpu_irq_src *src,
6716                                             unsigned type,
6717                                             enum amdgpu_interrupt_state state)
6718 {
6719         switch (type) {
6720         case AMDGPU_CP_IRQ_GFX_EOP:
6721                 gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
6722                 break;
6723         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6724                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6725                 break;
6726         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6727                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6728                 break;
6729         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6730                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6731                 break;
6732         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6733                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6734                 break;
6735         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6736                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6737                 break;
6738         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6739                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6740                 break;
6741         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6742                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6743                 break;
6744         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6745                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6746                 break;
6747         default:
6748                 break;
6749         }
6750         return 0;
6751 }
6752
6753 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
6754                             struct amdgpu_irq_src *source,
6755                             struct amdgpu_iv_entry *entry)
6756 {
6757         int i;
6758         u8 me_id, pipe_id, queue_id;
6759         struct amdgpu_ring *ring;
6760
6761         DRM_DEBUG("IH: CP EOP\n");
6762         me_id = (entry->ring_id & 0x0c) >> 2;
6763         pipe_id = (entry->ring_id & 0x03) >> 0;
6764         queue_id = (entry->ring_id & 0x70) >> 4;
6765
6766         switch (me_id) {
6767         case 0:
6768                 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6769                 break;
6770         case 1:
6771         case 2:
6772                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6773                         ring = &adev->gfx.compute_ring[i];
6774                         /* Per-queue interrupt is supported for MEC starting from VI.
6775                           * The interrupt can only be enabled/disabled per pipe instead of per queue.
6776                           */
6777                         if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6778                                 amdgpu_fence_process(ring);
6779                 }
6780                 break;
6781         }
6782         return 0;
6783 }
6784
6785 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
6786                                  struct amdgpu_irq_src *source,
6787                                  struct amdgpu_iv_entry *entry)
6788 {
6789         DRM_ERROR("Illegal register access in command stream\n");
6790         schedule_work(&adev->reset_work);
6791         return 0;
6792 }
6793
6794 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
6795                                   struct amdgpu_irq_src *source,
6796                                   struct amdgpu_iv_entry *entry)
6797 {
6798         DRM_ERROR("Illegal instruction in command stream\n");
6799         schedule_work(&adev->reset_work);
6800         return 0;
6801 }
6802
6803 static int gfx_v8_0_kiq_set_interrupt_state(struct amdgpu_device *adev,
6804                                             struct amdgpu_irq_src *src,
6805                                             unsigned int type,
6806                                             enum amdgpu_interrupt_state state)
6807 {
6808         struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
6809
6810         switch (type) {
6811         case AMDGPU_CP_KIQ_IRQ_DRIVER0:
6812                 WREG32_FIELD(CPC_INT_CNTL, GENERIC2_INT_ENABLE,
6813                              state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6814                 if (ring->me == 1)
6815                         WREG32_FIELD_OFFSET(CP_ME1_PIPE0_INT_CNTL,
6816                                      ring->pipe,
6817                                      GENERIC2_INT_ENABLE,
6818                                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6819                 else
6820                         WREG32_FIELD_OFFSET(CP_ME2_PIPE0_INT_CNTL,
6821                                      ring->pipe,
6822                                      GENERIC2_INT_ENABLE,
6823                                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6824                 break;
6825         default:
6826                 BUG(); /* kiq only support GENERIC2_INT now */
6827                 break;
6828         }
6829         return 0;
6830 }
6831
6832 static int gfx_v8_0_kiq_irq(struct amdgpu_device *adev,
6833                             struct amdgpu_irq_src *source,
6834                             struct amdgpu_iv_entry *entry)
6835 {
6836         u8 me_id, pipe_id, queue_id;
6837         struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
6838
6839         me_id = (entry->ring_id & 0x0c) >> 2;
6840         pipe_id = (entry->ring_id & 0x03) >> 0;
6841         queue_id = (entry->ring_id & 0x70) >> 4;
6842         DRM_DEBUG("IH: CPC GENERIC2_INT, me:%d, pipe:%d, queue:%d\n",
6843                    me_id, pipe_id, queue_id);
6844
6845         amdgpu_fence_process(ring);
6846         return 0;
6847 }
6848
6849 static const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
6850         .name = "gfx_v8_0",
6851         .early_init = gfx_v8_0_early_init,
6852         .late_init = gfx_v8_0_late_init,
6853         .sw_init = gfx_v8_0_sw_init,
6854         .sw_fini = gfx_v8_0_sw_fini,
6855         .hw_init = gfx_v8_0_hw_init,
6856         .hw_fini = gfx_v8_0_hw_fini,
6857         .suspend = gfx_v8_0_suspend,
6858         .resume = gfx_v8_0_resume,
6859         .is_idle = gfx_v8_0_is_idle,
6860         .wait_for_idle = gfx_v8_0_wait_for_idle,
6861         .check_soft_reset = gfx_v8_0_check_soft_reset,
6862         .pre_soft_reset = gfx_v8_0_pre_soft_reset,
6863         .soft_reset = gfx_v8_0_soft_reset,
6864         .post_soft_reset = gfx_v8_0_post_soft_reset,
6865         .set_clockgating_state = gfx_v8_0_set_clockgating_state,
6866         .set_powergating_state = gfx_v8_0_set_powergating_state,
6867         .get_clockgating_state = gfx_v8_0_get_clockgating_state,
6868 };
6869
6870 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
6871         .type = AMDGPU_RING_TYPE_GFX,
6872         .align_mask = 0xff,
6873         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6874         .support_64bit_ptrs = false,
6875         .get_rptr = gfx_v8_0_ring_get_rptr,
6876         .get_wptr = gfx_v8_0_ring_get_wptr_gfx,
6877         .set_wptr = gfx_v8_0_ring_set_wptr_gfx,
6878         .emit_frame_size = /* maximum 215dw if count 16 IBs in */
6879                 5 +  /* COND_EXEC */
6880                 7 +  /* PIPELINE_SYNC */
6881                 19 + /* VM_FLUSH */
6882                 8 +  /* FENCE for VM_FLUSH */
6883                 20 + /* GDS switch */
6884                 4 + /* double SWITCH_BUFFER,
6885                        the first COND_EXEC jump to the place just
6886                            prior to this double SWITCH_BUFFER  */
6887                 5 + /* COND_EXEC */
6888                 7 +      /*     HDP_flush */
6889                 4 +      /*     VGT_flush */
6890                 14 + /* CE_META */
6891                 31 + /* DE_META */
6892                 3 + /* CNTX_CTRL */
6893                 5 + /* HDP_INVL */
6894                 8 + 8 + /* FENCE x2 */
6895                 2, /* SWITCH_BUFFER */
6896         .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_gfx */
6897         .emit_ib = gfx_v8_0_ring_emit_ib_gfx,
6898         .emit_fence = gfx_v8_0_ring_emit_fence_gfx,
6899         .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6900         .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6901         .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6902         .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6903         .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
6904         .test_ring = gfx_v8_0_ring_test_ring,
6905         .test_ib = gfx_v8_0_ring_test_ib,
6906         .insert_nop = amdgpu_ring_insert_nop,
6907         .pad_ib = amdgpu_ring_generic_pad_ib,
6908         .emit_switch_buffer = gfx_v8_ring_emit_sb,
6909         .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
6910         .init_cond_exec = gfx_v8_0_ring_emit_init_cond_exec,
6911         .patch_cond_exec = gfx_v8_0_ring_emit_patch_cond_exec,
6912 };
6913
6914 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
6915         .type = AMDGPU_RING_TYPE_COMPUTE,
6916         .align_mask = 0xff,
6917         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6918         .support_64bit_ptrs = false,
6919         .get_rptr = gfx_v8_0_ring_get_rptr,
6920         .get_wptr = gfx_v8_0_ring_get_wptr_compute,
6921         .set_wptr = gfx_v8_0_ring_set_wptr_compute,
6922         .emit_frame_size =
6923                 20 + /* gfx_v8_0_ring_emit_gds_switch */
6924                 7 + /* gfx_v8_0_ring_emit_hdp_flush */
6925                 5 + /* gfx_v8_0_ring_emit_hdp_invalidate */
6926                 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6927                 17 + /* gfx_v8_0_ring_emit_vm_flush */
6928                 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
6929         .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */
6930         .emit_ib = gfx_v8_0_ring_emit_ib_compute,
6931         .emit_fence = gfx_v8_0_ring_emit_fence_compute,
6932         .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6933         .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6934         .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6935         .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6936         .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
6937         .test_ring = gfx_v8_0_ring_test_ring,
6938         .test_ib = gfx_v8_0_ring_test_ib,
6939         .insert_nop = amdgpu_ring_insert_nop,
6940         .pad_ib = amdgpu_ring_generic_pad_ib,
6941         .set_priority = gfx_v8_0_ring_set_priority_compute,
6942 };
6943
6944 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {
6945         .type = AMDGPU_RING_TYPE_KIQ,
6946         .align_mask = 0xff,
6947         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6948         .support_64bit_ptrs = false,
6949         .get_rptr = gfx_v8_0_ring_get_rptr,
6950         .get_wptr = gfx_v8_0_ring_get_wptr_compute,
6951         .set_wptr = gfx_v8_0_ring_set_wptr_compute,
6952         .emit_frame_size =
6953                 20 + /* gfx_v8_0_ring_emit_gds_switch */
6954                 7 + /* gfx_v8_0_ring_emit_hdp_flush */
6955                 5 + /* gfx_v8_0_ring_emit_hdp_invalidate */
6956                 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6957                 17 + /* gfx_v8_0_ring_emit_vm_flush */
6958                 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */
6959         .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */
6960         .emit_ib = gfx_v8_0_ring_emit_ib_compute,
6961         .emit_fence = gfx_v8_0_ring_emit_fence_kiq,
6962         .test_ring = gfx_v8_0_ring_test_ring,
6963         .test_ib = gfx_v8_0_ring_test_ib,
6964         .insert_nop = amdgpu_ring_insert_nop,
6965         .pad_ib = amdgpu_ring_generic_pad_ib,
6966         .emit_rreg = gfx_v8_0_ring_emit_rreg,
6967         .emit_wreg = gfx_v8_0_ring_emit_wreg,
6968 };
6969
6970 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
6971 {
6972         int i;
6973
6974         adev->gfx.kiq.ring.funcs = &gfx_v8_0_ring_funcs_kiq;
6975
6976         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
6977                 adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
6978
6979         for (i = 0; i < adev->gfx.num_compute_rings; i++)
6980                 adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
6981 }
6982
6983 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
6984         .set = gfx_v8_0_set_eop_interrupt_state,
6985         .process = gfx_v8_0_eop_irq,
6986 };
6987
6988 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
6989         .set = gfx_v8_0_set_priv_reg_fault_state,
6990         .process = gfx_v8_0_priv_reg_irq,
6991 };
6992
6993 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
6994         .set = gfx_v8_0_set_priv_inst_fault_state,
6995         .process = gfx_v8_0_priv_inst_irq,
6996 };
6997
6998 static const struct amdgpu_irq_src_funcs gfx_v8_0_kiq_irq_funcs = {
6999         .set = gfx_v8_0_kiq_set_interrupt_state,
7000         .process = gfx_v8_0_kiq_irq,
7001 };
7002
7003 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
7004 {
7005         adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
7006         adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
7007
7008         adev->gfx.priv_reg_irq.num_types = 1;
7009         adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
7010
7011         adev->gfx.priv_inst_irq.num_types = 1;
7012         adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
7013
7014         adev->gfx.kiq.irq.num_types = AMDGPU_CP_KIQ_IRQ_LAST;
7015         adev->gfx.kiq.irq.funcs = &gfx_v8_0_kiq_irq_funcs;
7016 }
7017
7018 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
7019 {
7020         adev->gfx.rlc.funcs = &iceland_rlc_funcs;
7021 }
7022
7023 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
7024 {
7025         /* init asci gds info */
7026         adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
7027         adev->gds.gws.total_size = 64;
7028         adev->gds.oa.total_size = 16;
7029
7030         if (adev->gds.mem.total_size == 64 * 1024) {
7031                 adev->gds.mem.gfx_partition_size = 4096;
7032                 adev->gds.mem.cs_partition_size = 4096;
7033
7034                 adev->gds.gws.gfx_partition_size = 4;
7035                 adev->gds.gws.cs_partition_size = 4;
7036
7037                 adev->gds.oa.gfx_partition_size = 4;
7038                 adev->gds.oa.cs_partition_size = 1;
7039         } else {
7040                 adev->gds.mem.gfx_partition_size = 1024;
7041                 adev->gds.mem.cs_partition_size = 1024;
7042
7043                 adev->gds.gws.gfx_partition_size = 16;
7044                 adev->gds.gws.cs_partition_size = 16;
7045
7046                 adev->gds.oa.gfx_partition_size = 4;
7047                 adev->gds.oa.cs_partition_size = 4;
7048         }
7049 }
7050
7051 static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
7052                                                  u32 bitmap)
7053 {
7054         u32 data;
7055
7056         if (!bitmap)
7057                 return;
7058
7059         data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7060         data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7061
7062         WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
7063 }
7064
7065 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
7066 {
7067         u32 data, mask;
7068
7069         data =  RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) |
7070                 RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
7071
7072         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
7073
7074         return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask;
7075 }
7076
7077 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
7078 {
7079         int i, j, k, counter, active_cu_number = 0;
7080         u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
7081         struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
7082         unsigned disable_masks[4 * 2];
7083         u32 ao_cu_num;
7084
7085         memset(cu_info, 0, sizeof(*cu_info));
7086
7087         if (adev->flags & AMD_IS_APU)
7088                 ao_cu_num = 2;
7089         else
7090                 ao_cu_num = adev->gfx.config.max_cu_per_sh;
7091
7092         amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
7093
7094         mutex_lock(&adev->grbm_idx_mutex);
7095         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
7096                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
7097                         mask = 1;
7098                         ao_bitmap = 0;
7099                         counter = 0;
7100                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
7101                         if (i < 4 && j < 2)
7102                                 gfx_v8_0_set_user_cu_inactive_bitmap(
7103                                         adev, disable_masks[i * 2 + j]);
7104                         bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
7105                         cu_info->bitmap[i][j] = bitmap;
7106
7107                         for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
7108                                 if (bitmap & mask) {
7109                                         if (counter < ao_cu_num)
7110                                                 ao_bitmap |= mask;
7111                                         counter ++;
7112                                 }
7113                                 mask <<= 1;
7114                         }
7115                         active_cu_number += counter;
7116                         if (i < 2 && j < 2)
7117                                 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
7118                         cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
7119                 }
7120         }
7121         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
7122         mutex_unlock(&adev->grbm_idx_mutex);
7123
7124         cu_info->number = active_cu_number;
7125         cu_info->ao_cu_mask = ao_cu_mask;
7126 }
7127
7128 const struct amdgpu_ip_block_version gfx_v8_0_ip_block =
7129 {
7130         .type = AMD_IP_BLOCK_TYPE_GFX,
7131         .major = 8,
7132         .minor = 0,
7133         .rev = 0,
7134         .funcs = &gfx_v8_0_ip_funcs,
7135 };
7136
7137 const struct amdgpu_ip_block_version gfx_v8_1_ip_block =
7138 {
7139         .type = AMD_IP_BLOCK_TYPE_GFX,
7140         .major = 8,
7141         .minor = 1,
7142         .rev = 0,
7143         .funcs = &gfx_v8_0_ip_funcs,
7144 };
7145
7146 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
7147 {
7148         uint64_t ce_payload_addr;
7149         int cnt_ce;
7150         union {
7151                 struct vi_ce_ib_state regular;
7152                 struct vi_ce_ib_state_chained_ib chained;
7153         } ce_payload = {};
7154
7155         if (ring->adev->virt.chained_ib_support) {
7156                 ce_payload_addr = AMDGPU_VA_RESERVED_SIZE - 2 * 4096 +
7157                                                   offsetof(struct vi_gfx_meta_data_chained_ib, ce_payload);
7158                 cnt_ce = (sizeof(ce_payload.chained) >> 2) + 4 - 2;
7159         } else {
7160                 ce_payload_addr = AMDGPU_VA_RESERVED_SIZE - 2 * 4096 +
7161                                                   offsetof(struct vi_gfx_meta_data, ce_payload);
7162                 cnt_ce = (sizeof(ce_payload.regular) >> 2) + 4 - 2;
7163         }
7164
7165         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_ce));
7166         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
7167                                 WRITE_DATA_DST_SEL(8) |
7168                                 WR_CONFIRM) |
7169                                 WRITE_DATA_CACHE_POLICY(0));
7170         amdgpu_ring_write(ring, lower_32_bits(ce_payload_addr));
7171         amdgpu_ring_write(ring, upper_32_bits(ce_payload_addr));
7172         amdgpu_ring_write_multiple(ring, (void *)&ce_payload, cnt_ce - 2);
7173 }
7174
7175 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring)
7176 {
7177         uint64_t de_payload_addr, gds_addr, csa_addr;
7178         int cnt_de;
7179         union {
7180                 struct vi_de_ib_state regular;
7181                 struct vi_de_ib_state_chained_ib chained;
7182         } de_payload = {};
7183
7184         csa_addr = AMDGPU_VA_RESERVED_SIZE - 2 * 4096;
7185         gds_addr = csa_addr + 4096;
7186         if (ring->adev->virt.chained_ib_support) {
7187                 de_payload.chained.gds_backup_addrlo = lower_32_bits(gds_addr);
7188                 de_payload.chained.gds_backup_addrhi = upper_32_bits(gds_addr);
7189                 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data_chained_ib, de_payload);
7190                 cnt_de = (sizeof(de_payload.chained) >> 2) + 4 - 2;
7191         } else {
7192                 de_payload.regular.gds_backup_addrlo = lower_32_bits(gds_addr);
7193                 de_payload.regular.gds_backup_addrhi = upper_32_bits(gds_addr);
7194                 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data, de_payload);
7195                 cnt_de = (sizeof(de_payload.regular) >> 2) + 4 - 2;
7196         }
7197
7198         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_de));
7199         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
7200                                 WRITE_DATA_DST_SEL(8) |
7201                                 WR_CONFIRM) |
7202                                 WRITE_DATA_CACHE_POLICY(0));
7203         amdgpu_ring_write(ring, lower_32_bits(de_payload_addr));
7204         amdgpu_ring_write(ring, upper_32_bits(de_payload_addr));
7205         amdgpu_ring_write_multiple(ring, (void *)&de_payload, cnt_de - 2);
7206 }
This page took 0.468746 seconds and 4 git commands to generate.