]> Git Repo - linux.git/blob - drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
[linux.git] / drivers / gpu / drm / amd / amdgpu / gfx_v8_0.c
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 #include <linux/kernel.h>
24 #include <linux/firmware.h>
25 #include <drm/drmP.h>
26 #include "amdgpu.h"
27 #include "amdgpu_gfx.h"
28 #include "vi.h"
29 #include "vi_structs.h"
30 #include "vid.h"
31 #include "amdgpu_ucode.h"
32 #include "amdgpu_atombios.h"
33 #include "atombios_i2c.h"
34 #include "clearstate_vi.h"
35
36 #include "gmc/gmc_8_2_d.h"
37 #include "gmc/gmc_8_2_sh_mask.h"
38
39 #include "oss/oss_3_0_d.h"
40 #include "oss/oss_3_0_sh_mask.h"
41
42 #include "bif/bif_5_0_d.h"
43 #include "bif/bif_5_0_sh_mask.h"
44 #include "gca/gfx_8_0_d.h"
45 #include "gca/gfx_8_0_enum.h"
46 #include "gca/gfx_8_0_sh_mask.h"
47
48 #include "dce/dce_10_0_d.h"
49 #include "dce/dce_10_0_sh_mask.h"
50
51 #include "smu/smu_7_1_3_d.h"
52
53 #include "ivsrcid/ivsrcid_vislands30.h"
54
55 #define GFX8_NUM_GFX_RINGS     1
56 #define GFX8_MEC_HPD_SIZE 4096
57
58 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
59 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
60 #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
61 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
62
63 #define ARRAY_MODE(x)                                   ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
64 #define PIPE_CONFIG(x)                                  ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
65 #define TILE_SPLIT(x)                                   ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
66 #define MICRO_TILE_MODE_NEW(x)                          ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
67 #define SAMPLE_SPLIT(x)                                 ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
68 #define BANK_WIDTH(x)                                   ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
69 #define BANK_HEIGHT(x)                                  ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
70 #define MACRO_TILE_ASPECT(x)                            ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
71 #define NUM_BANKS(x)                                    ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
72
73 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK            0x00000001L
74 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK            0x00000002L
75 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK           0x00000004L
76 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK           0x00000008L
77 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK           0x00000010L
78 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK           0x00000020L
79
80 /* BPM SERDES CMD */
81 #define SET_BPM_SERDES_CMD    1
82 #define CLE_BPM_SERDES_CMD    0
83
84 /* BPM Register Address*/
85 enum {
86         BPM_REG_CGLS_EN = 0,        /* Enable/Disable CGLS */
87         BPM_REG_CGLS_ON,            /* ON/OFF CGLS: shall be controlled by RLC FW */
88         BPM_REG_CGCG_OVERRIDE,      /* Set/Clear CGCG Override */
89         BPM_REG_MGCG_OVERRIDE,      /* Set/Clear MGCG Override */
90         BPM_REG_FGCG_OVERRIDE,      /* Set/Clear FGCG Override */
91         BPM_REG_FGCG_MAX
92 };
93
94 #define RLC_FormatDirectRegListLength        14
95
96 MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
97 MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
98 MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
99 MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
100 MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
101 MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
102
103 MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
104 MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
105 MODULE_FIRMWARE("amdgpu/stoney_me.bin");
106 MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
107 MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
108
109 MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
110 MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
111 MODULE_FIRMWARE("amdgpu/tonga_me.bin");
112 MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
113 MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
114 MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
115
116 MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
117 MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
118 MODULE_FIRMWARE("amdgpu/topaz_me.bin");
119 MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
120 MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
121
122 MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
123 MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
124 MODULE_FIRMWARE("amdgpu/fiji_me.bin");
125 MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
126 MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
127 MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
128
129 MODULE_FIRMWARE("amdgpu/polaris10_ce.bin");
130 MODULE_FIRMWARE("amdgpu/polaris10_ce_2.bin");
131 MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin");
132 MODULE_FIRMWARE("amdgpu/polaris10_pfp_2.bin");
133 MODULE_FIRMWARE("amdgpu/polaris10_me.bin");
134 MODULE_FIRMWARE("amdgpu/polaris10_me_2.bin");
135 MODULE_FIRMWARE("amdgpu/polaris10_mec.bin");
136 MODULE_FIRMWARE("amdgpu/polaris10_mec_2.bin");
137 MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin");
138 MODULE_FIRMWARE("amdgpu/polaris10_mec2_2.bin");
139 MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin");
140
141 MODULE_FIRMWARE("amdgpu/polaris11_ce.bin");
142 MODULE_FIRMWARE("amdgpu/polaris11_ce_2.bin");
143 MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin");
144 MODULE_FIRMWARE("amdgpu/polaris11_pfp_2.bin");
145 MODULE_FIRMWARE("amdgpu/polaris11_me.bin");
146 MODULE_FIRMWARE("amdgpu/polaris11_me_2.bin");
147 MODULE_FIRMWARE("amdgpu/polaris11_mec.bin");
148 MODULE_FIRMWARE("amdgpu/polaris11_mec_2.bin");
149 MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin");
150 MODULE_FIRMWARE("amdgpu/polaris11_mec2_2.bin");
151 MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin");
152
153 MODULE_FIRMWARE("amdgpu/polaris12_ce.bin");
154 MODULE_FIRMWARE("amdgpu/polaris12_ce_2.bin");
155 MODULE_FIRMWARE("amdgpu/polaris12_pfp.bin");
156 MODULE_FIRMWARE("amdgpu/polaris12_pfp_2.bin");
157 MODULE_FIRMWARE("amdgpu/polaris12_me.bin");
158 MODULE_FIRMWARE("amdgpu/polaris12_me_2.bin");
159 MODULE_FIRMWARE("amdgpu/polaris12_mec.bin");
160 MODULE_FIRMWARE("amdgpu/polaris12_mec_2.bin");
161 MODULE_FIRMWARE("amdgpu/polaris12_mec2.bin");
162 MODULE_FIRMWARE("amdgpu/polaris12_mec2_2.bin");
163 MODULE_FIRMWARE("amdgpu/polaris12_rlc.bin");
164
165 MODULE_FIRMWARE("amdgpu/vegam_ce.bin");
166 MODULE_FIRMWARE("amdgpu/vegam_pfp.bin");
167 MODULE_FIRMWARE("amdgpu/vegam_me.bin");
168 MODULE_FIRMWARE("amdgpu/vegam_mec.bin");
169 MODULE_FIRMWARE("amdgpu/vegam_mec2.bin");
170 MODULE_FIRMWARE("amdgpu/vegam_rlc.bin");
171
172 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
173 {
174         {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
175         {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
176         {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
177         {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
178         {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
179         {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
180         {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
181         {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
182         {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
183         {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
184         {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
185         {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
186         {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
187         {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
188         {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
189         {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
190 };
191
192 static const u32 golden_settings_tonga_a11[] =
193 {
194         mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
195         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
196         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
197         mmGB_GPU_ID, 0x0000000f, 0x00000000,
198         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
199         mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
200         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
201         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
202         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
203         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
204         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
205         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
206         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
207         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
208         mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
209         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
210 };
211
212 static const u32 tonga_golden_common_all[] =
213 {
214         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
215         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
216         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
217         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
218         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
219         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
220         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
221         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
222 };
223
224 static const u32 tonga_mgcg_cgcg_init[] =
225 {
226         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
227         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
228         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
229         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
230         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
231         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
232         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
233         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
234         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
235         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
236         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
237         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
238         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
239         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
240         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
241         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
242         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
243         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
244         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
245         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
246         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
247         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
248         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
249         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
250         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
251         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
252         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
253         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
254         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
255         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
256         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
257         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
258         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
259         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
260         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
261         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
262         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
263         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
264         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
265         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
266         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
267         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
268         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
269         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
270         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
271         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
272         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
273         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
274         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
275         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
276         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
277         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
278         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
279         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
280         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
281         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
282         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
283         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
284         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
285         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
286         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
287         mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
288         mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
289         mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
290         mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
291         mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
292         mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
293         mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
294         mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
295         mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
296         mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
297         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
298         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
299         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
300         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
301 };
302
303 static const u32 golden_settings_vegam_a11[] =
304 {
305         mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
306         mmCB_HW_CONTROL_2, 0x0f000000, 0x0d000000,
307         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
308         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
309         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
310         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
311         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x3a00161a,
312         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002e,
313         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
314         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
315         mmSQ_CONFIG, 0x07f80000, 0x01180000,
316         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
317         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
318         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
319         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
320         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x32761054,
321         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
322 };
323
324 static const u32 vegam_golden_common_all[] =
325 {
326         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
327         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
328         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
329         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
330         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
331         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
332 };
333
334 static const u32 golden_settings_polaris11_a11[] =
335 {
336         mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208,
337         mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
338         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
339         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
340         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
341         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
342         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
343         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
344         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
345         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
346         mmSQ_CONFIG, 0x07f80000, 0x01180000,
347         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
348         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
349         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
350         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
351         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
352         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
353 };
354
355 static const u32 polaris11_golden_common_all[] =
356 {
357         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
358         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002,
359         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
360         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
361         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
362         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
363 };
364
365 static const u32 golden_settings_polaris10_a11[] =
366 {
367         mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
368         mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
369         mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
370         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
371         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
372         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
373         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
374         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
375         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a,
376         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
377         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
378         mmSQ_CONFIG, 0x07f80000, 0x07180000,
379         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
380         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
381         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
382         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
383         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
384 };
385
386 static const u32 polaris10_golden_common_all[] =
387 {
388         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
389         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
390         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
391         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
392         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
393         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
394         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
395         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
396 };
397
398 static const u32 fiji_golden_common_all[] =
399 {
400         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
401         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
402         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
403         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
404         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
405         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
406         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
407         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
408         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
409         mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
410 };
411
412 static const u32 golden_settings_fiji_a10[] =
413 {
414         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
415         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
416         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
417         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
418         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
419         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
420         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
421         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
422         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
423         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
424         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
425 };
426
427 static const u32 fiji_mgcg_cgcg_init[] =
428 {
429         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
430         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
431         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
432         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
433         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
434         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
435         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
436         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
437         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
438         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
439         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
440         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
441         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
442         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
443         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
444         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
445         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
446         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
447         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
448         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
449         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
450         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
451         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
452         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
453         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
454         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
455         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
456         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
457         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
458         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
459         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
460         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
461         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
462         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
463         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
464 };
465
466 static const u32 golden_settings_iceland_a11[] =
467 {
468         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
469         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
470         mmDB_DEBUG3, 0xc0000000, 0xc0000000,
471         mmGB_GPU_ID, 0x0000000f, 0x00000000,
472         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
473         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
474         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
475         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
476         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
477         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
478         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
479         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
480         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
481         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
482         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
483         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
484 };
485
486 static const u32 iceland_golden_common_all[] =
487 {
488         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
489         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
490         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
491         mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
492         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
493         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
494         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
495         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
496 };
497
498 static const u32 iceland_mgcg_cgcg_init[] =
499 {
500         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
501         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
502         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
503         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
504         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
505         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
506         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
507         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
508         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
509         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
510         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
511         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
512         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
513         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
514         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
515         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
516         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
517         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
518         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
519         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
520         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
521         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
522         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
523         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
524         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
525         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
526         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
527         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
528         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
529         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
530         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
531         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
532         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
533         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
534         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
535         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
536         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
537         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
538         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
539         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
540         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
541         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
542         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
543         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
544         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
545         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
546         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
547         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
548         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
549         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
550         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
551         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
552         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
553         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
554         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
555         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
556         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
557         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
558         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
559         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
560         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
561         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
562         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
563         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
564 };
565
566 static const u32 cz_golden_settings_a11[] =
567 {
568         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
569         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
570         mmGB_GPU_ID, 0x0000000f, 0x00000000,
571         mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
572         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
573         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
574         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
575         mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
576         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
577         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
578         mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
579         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
580 };
581
582 static const u32 cz_golden_common_all[] =
583 {
584         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
585         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
586         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
587         mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
588         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
589         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
590         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
591         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
592 };
593
594 static const u32 cz_mgcg_cgcg_init[] =
595 {
596         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
597         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
598         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
599         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
600         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
601         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
602         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
603         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
604         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
605         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
606         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
607         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
608         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
609         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
610         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
611         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
612         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
613         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
614         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
615         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
616         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
617         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
618         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
619         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
620         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
621         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
622         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
623         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
624         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
625         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
626         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
627         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
628         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
629         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
630         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
631         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
632         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
633         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
634         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
635         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
636         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
637         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
638         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
639         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
640         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
641         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
642         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
643         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
644         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
645         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
646         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
647         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
648         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
649         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
650         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
651         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
652         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
653         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
654         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
655         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
656         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
657         mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
658         mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
659         mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
660         mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
661         mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
662         mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
663         mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
664         mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
665         mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
666         mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
667         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
668         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
669         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
670         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
671 };
672
673 static const u32 stoney_golden_settings_a11[] =
674 {
675         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
676         mmGB_GPU_ID, 0x0000000f, 0x00000000,
677         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
678         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
679         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
680         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
681         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
682         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
683         mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
684         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
685 };
686
687 static const u32 stoney_golden_common_all[] =
688 {
689         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
690         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
691         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
692         mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
693         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
694         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
695         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
696         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
697 };
698
699 static const u32 stoney_mgcg_cgcg_init[] =
700 {
701         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
702         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
703         mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
704         mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
705         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
706 };
707
708
709 static const char * const sq_edc_source_names[] = {
710         "SQ_EDC_INFO_SOURCE_INVALID: No EDC error has occurred",
711         "SQ_EDC_INFO_SOURCE_INST: EDC source is Instruction Fetch",
712         "SQ_EDC_INFO_SOURCE_SGPR: EDC source is SGPR or SQC data return",
713         "SQ_EDC_INFO_SOURCE_VGPR: EDC source is VGPR",
714         "SQ_EDC_INFO_SOURCE_LDS: EDC source is LDS",
715         "SQ_EDC_INFO_SOURCE_GDS: EDC source is GDS",
716         "SQ_EDC_INFO_SOURCE_TA: EDC source is TA",
717 };
718
719 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
720 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
721 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
722 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
723 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
724 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
725 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring);
726 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring);
727
728 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
729 {
730         switch (adev->asic_type) {
731         case CHIP_TOPAZ:
732                 amdgpu_device_program_register_sequence(adev,
733                                                         iceland_mgcg_cgcg_init,
734                                                         ARRAY_SIZE(iceland_mgcg_cgcg_init));
735                 amdgpu_device_program_register_sequence(adev,
736                                                         golden_settings_iceland_a11,
737                                                         ARRAY_SIZE(golden_settings_iceland_a11));
738                 amdgpu_device_program_register_sequence(adev,
739                                                         iceland_golden_common_all,
740                                                         ARRAY_SIZE(iceland_golden_common_all));
741                 break;
742         case CHIP_FIJI:
743                 amdgpu_device_program_register_sequence(adev,
744                                                         fiji_mgcg_cgcg_init,
745                                                         ARRAY_SIZE(fiji_mgcg_cgcg_init));
746                 amdgpu_device_program_register_sequence(adev,
747                                                         golden_settings_fiji_a10,
748                                                         ARRAY_SIZE(golden_settings_fiji_a10));
749                 amdgpu_device_program_register_sequence(adev,
750                                                         fiji_golden_common_all,
751                                                         ARRAY_SIZE(fiji_golden_common_all));
752                 break;
753
754         case CHIP_TONGA:
755                 amdgpu_device_program_register_sequence(adev,
756                                                         tonga_mgcg_cgcg_init,
757                                                         ARRAY_SIZE(tonga_mgcg_cgcg_init));
758                 amdgpu_device_program_register_sequence(adev,
759                                                         golden_settings_tonga_a11,
760                                                         ARRAY_SIZE(golden_settings_tonga_a11));
761                 amdgpu_device_program_register_sequence(adev,
762                                                         tonga_golden_common_all,
763                                                         ARRAY_SIZE(tonga_golden_common_all));
764                 break;
765         case CHIP_VEGAM:
766                 amdgpu_device_program_register_sequence(adev,
767                                                         golden_settings_vegam_a11,
768                                                         ARRAY_SIZE(golden_settings_vegam_a11));
769                 amdgpu_device_program_register_sequence(adev,
770                                                         vegam_golden_common_all,
771                                                         ARRAY_SIZE(vegam_golden_common_all));
772                 break;
773         case CHIP_POLARIS11:
774         case CHIP_POLARIS12:
775                 amdgpu_device_program_register_sequence(adev,
776                                                         golden_settings_polaris11_a11,
777                                                         ARRAY_SIZE(golden_settings_polaris11_a11));
778                 amdgpu_device_program_register_sequence(adev,
779                                                         polaris11_golden_common_all,
780                                                         ARRAY_SIZE(polaris11_golden_common_all));
781                 break;
782         case CHIP_POLARIS10:
783                 amdgpu_device_program_register_sequence(adev,
784                                                         golden_settings_polaris10_a11,
785                                                         ARRAY_SIZE(golden_settings_polaris10_a11));
786                 amdgpu_device_program_register_sequence(adev,
787                                                         polaris10_golden_common_all,
788                                                         ARRAY_SIZE(polaris10_golden_common_all));
789                 WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C);
790                 if (adev->pdev->revision == 0xc7 &&
791                     ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) ||
792                      (adev->pdev->subsystem_device == 0x4a8 && adev->pdev->subsystem_vendor == 0x1043) ||
793                      (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1682))) {
794                         amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD);
795                         amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0);
796                 }
797                 break;
798         case CHIP_CARRIZO:
799                 amdgpu_device_program_register_sequence(adev,
800                                                         cz_mgcg_cgcg_init,
801                                                         ARRAY_SIZE(cz_mgcg_cgcg_init));
802                 amdgpu_device_program_register_sequence(adev,
803                                                         cz_golden_settings_a11,
804                                                         ARRAY_SIZE(cz_golden_settings_a11));
805                 amdgpu_device_program_register_sequence(adev,
806                                                         cz_golden_common_all,
807                                                         ARRAY_SIZE(cz_golden_common_all));
808                 break;
809         case CHIP_STONEY:
810                 amdgpu_device_program_register_sequence(adev,
811                                                         stoney_mgcg_cgcg_init,
812                                                         ARRAY_SIZE(stoney_mgcg_cgcg_init));
813                 amdgpu_device_program_register_sequence(adev,
814                                                         stoney_golden_settings_a11,
815                                                         ARRAY_SIZE(stoney_golden_settings_a11));
816                 amdgpu_device_program_register_sequence(adev,
817                                                         stoney_golden_common_all,
818                                                         ARRAY_SIZE(stoney_golden_common_all));
819                 break;
820         default:
821                 break;
822         }
823 }
824
825 static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
826 {
827         adev->gfx.scratch.num_reg = 8;
828         adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
829         adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
830 }
831
832 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
833 {
834         struct amdgpu_device *adev = ring->adev;
835         uint32_t scratch;
836         uint32_t tmp = 0;
837         unsigned i;
838         int r;
839
840         r = amdgpu_gfx_scratch_get(adev, &scratch);
841         if (r)
842                 return r;
843
844         WREG32(scratch, 0xCAFEDEAD);
845         r = amdgpu_ring_alloc(ring, 3);
846         if (r)
847                 goto error_free_scratch;
848
849         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
850         amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
851         amdgpu_ring_write(ring, 0xDEADBEEF);
852         amdgpu_ring_commit(ring);
853
854         for (i = 0; i < adev->usec_timeout; i++) {
855                 tmp = RREG32(scratch);
856                 if (tmp == 0xDEADBEEF)
857                         break;
858                 DRM_UDELAY(1);
859         }
860
861         if (i >= adev->usec_timeout)
862                 r = -ETIMEDOUT;
863
864 error_free_scratch:
865         amdgpu_gfx_scratch_free(adev, scratch);
866         return r;
867 }
868
869 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
870 {
871         struct amdgpu_device *adev = ring->adev;
872         struct amdgpu_ib ib;
873         struct dma_fence *f = NULL;
874
875         unsigned int index;
876         uint64_t gpu_addr;
877         uint32_t tmp;
878         long r;
879
880         r = amdgpu_device_wb_get(adev, &index);
881         if (r)
882                 return r;
883
884         gpu_addr = adev->wb.gpu_addr + (index * 4);
885         adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
886         memset(&ib, 0, sizeof(ib));
887         r = amdgpu_ib_get(adev, NULL, 16, &ib);
888         if (r)
889                 goto err1;
890
891         ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
892         ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
893         ib.ptr[2] = lower_32_bits(gpu_addr);
894         ib.ptr[3] = upper_32_bits(gpu_addr);
895         ib.ptr[4] = 0xDEADBEEF;
896         ib.length_dw = 5;
897
898         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
899         if (r)
900                 goto err2;
901
902         r = dma_fence_wait_timeout(f, false, timeout);
903         if (r == 0) {
904                 r = -ETIMEDOUT;
905                 goto err2;
906         } else if (r < 0) {
907                 goto err2;
908         }
909
910         tmp = adev->wb.wb[index];
911         if (tmp == 0xDEADBEEF)
912                 r = 0;
913         else
914                 r = -EINVAL;
915
916 err2:
917         amdgpu_ib_free(adev, &ib, NULL);
918         dma_fence_put(f);
919 err1:
920         amdgpu_device_wb_free(adev, index);
921         return r;
922 }
923
924
925 static void gfx_v8_0_free_microcode(struct amdgpu_device *adev)
926 {
927         release_firmware(adev->gfx.pfp_fw);
928         adev->gfx.pfp_fw = NULL;
929         release_firmware(adev->gfx.me_fw);
930         adev->gfx.me_fw = NULL;
931         release_firmware(adev->gfx.ce_fw);
932         adev->gfx.ce_fw = NULL;
933         release_firmware(adev->gfx.rlc_fw);
934         adev->gfx.rlc_fw = NULL;
935         release_firmware(adev->gfx.mec_fw);
936         adev->gfx.mec_fw = NULL;
937         if ((adev->asic_type != CHIP_STONEY) &&
938             (adev->asic_type != CHIP_TOPAZ))
939                 release_firmware(adev->gfx.mec2_fw);
940         adev->gfx.mec2_fw = NULL;
941
942         kfree(adev->gfx.rlc.register_list_format);
943 }
944
945 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
946 {
947         const char *chip_name;
948         char fw_name[30];
949         int err;
950         struct amdgpu_firmware_info *info = NULL;
951         const struct common_firmware_header *header = NULL;
952         const struct gfx_firmware_header_v1_0 *cp_hdr;
953         const struct rlc_firmware_header_v2_0 *rlc_hdr;
954         unsigned int *tmp = NULL, i;
955
956         DRM_DEBUG("\n");
957
958         switch (adev->asic_type) {
959         case CHIP_TOPAZ:
960                 chip_name = "topaz";
961                 break;
962         case CHIP_TONGA:
963                 chip_name = "tonga";
964                 break;
965         case CHIP_CARRIZO:
966                 chip_name = "carrizo";
967                 break;
968         case CHIP_FIJI:
969                 chip_name = "fiji";
970                 break;
971         case CHIP_STONEY:
972                 chip_name = "stoney";
973                 break;
974         case CHIP_POLARIS10:
975                 chip_name = "polaris10";
976                 break;
977         case CHIP_POLARIS11:
978                 chip_name = "polaris11";
979                 break;
980         case CHIP_POLARIS12:
981                 chip_name = "polaris12";
982                 break;
983         case CHIP_VEGAM:
984                 chip_name = "vegam";
985                 break;
986         default:
987                 BUG();
988         }
989
990         if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
991                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp_2.bin", chip_name);
992                 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
993                 if (err == -ENOENT) {
994                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
995                         err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
996                 }
997         } else {
998                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
999                 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
1000         }
1001         if (err)
1002                 goto out;
1003         err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
1004         if (err)
1005                 goto out;
1006         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1007         adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1008         adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1009
1010         if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1011                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me_2.bin", chip_name);
1012                 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1013                 if (err == -ENOENT) {
1014                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1015                         err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1016                 }
1017         } else {
1018                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1019                 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1020         }
1021         if (err)
1022                 goto out;
1023         err = amdgpu_ucode_validate(adev->gfx.me_fw);
1024         if (err)
1025                 goto out;
1026         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1027         adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1028
1029         adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1030
1031         if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1032                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce_2.bin", chip_name);
1033                 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1034                 if (err == -ENOENT) {
1035                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1036                         err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1037                 }
1038         } else {
1039                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1040                 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1041         }
1042         if (err)
1043                 goto out;
1044         err = amdgpu_ucode_validate(adev->gfx.ce_fw);
1045         if (err)
1046                 goto out;
1047         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1048         adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1049         adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1050
1051         /*
1052          * Support for MCBP/Virtualization in combination with chained IBs is
1053          * formal released on feature version #46
1054          */
1055         if (adev->gfx.ce_feature_version >= 46 &&
1056             adev->gfx.pfp_feature_version >= 46) {
1057                 adev->virt.chained_ib_support = true;
1058                 DRM_INFO("Chained IB support enabled!\n");
1059         } else
1060                 adev->virt.chained_ib_support = false;
1061
1062         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
1063         err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
1064         if (err)
1065                 goto out;
1066         err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
1067         rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1068         adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
1069         adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
1070
1071         adev->gfx.rlc.save_and_restore_offset =
1072                         le32_to_cpu(rlc_hdr->save_and_restore_offset);
1073         adev->gfx.rlc.clear_state_descriptor_offset =
1074                         le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
1075         adev->gfx.rlc.avail_scratch_ram_locations =
1076                         le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
1077         adev->gfx.rlc.reg_restore_list_size =
1078                         le32_to_cpu(rlc_hdr->reg_restore_list_size);
1079         adev->gfx.rlc.reg_list_format_start =
1080                         le32_to_cpu(rlc_hdr->reg_list_format_start);
1081         adev->gfx.rlc.reg_list_format_separate_start =
1082                         le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
1083         adev->gfx.rlc.starting_offsets_start =
1084                         le32_to_cpu(rlc_hdr->starting_offsets_start);
1085         adev->gfx.rlc.reg_list_format_size_bytes =
1086                         le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
1087         adev->gfx.rlc.reg_list_size_bytes =
1088                         le32_to_cpu(rlc_hdr->reg_list_size_bytes);
1089
1090         adev->gfx.rlc.register_list_format =
1091                         kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
1092                                         adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
1093
1094         if (!adev->gfx.rlc.register_list_format) {
1095                 err = -ENOMEM;
1096                 goto out;
1097         }
1098
1099         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1100                         le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1101         for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++)
1102                 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
1103
1104         adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1105
1106         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1107                         le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1108         for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++)
1109                 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
1110
1111         if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1112                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec_2.bin", chip_name);
1113                 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1114                 if (err == -ENOENT) {
1115                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1116                         err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1117                 }
1118         } else {
1119                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1120                 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1121         }
1122         if (err)
1123                 goto out;
1124         err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1125         if (err)
1126                 goto out;
1127         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1128         adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1129         adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1130
1131         if ((adev->asic_type != CHIP_STONEY) &&
1132             (adev->asic_type != CHIP_TOPAZ)) {
1133                 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1134                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2_2.bin", chip_name);
1135                         err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1136                         if (err == -ENOENT) {
1137                                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1138                                 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1139                         }
1140                 } else {
1141                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1142                         err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1143                 }
1144                 if (!err) {
1145                         err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1146                         if (err)
1147                                 goto out;
1148                         cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1149                                 adev->gfx.mec2_fw->data;
1150                         adev->gfx.mec2_fw_version =
1151                                 le32_to_cpu(cp_hdr->header.ucode_version);
1152                         adev->gfx.mec2_feature_version =
1153                                 le32_to_cpu(cp_hdr->ucode_feature_version);
1154                 } else {
1155                         err = 0;
1156                         adev->gfx.mec2_fw = NULL;
1157                 }
1158         }
1159
1160         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1161         info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1162         info->fw = adev->gfx.pfp_fw;
1163         header = (const struct common_firmware_header *)info->fw->data;
1164         adev->firmware.fw_size +=
1165                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1166
1167         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1168         info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1169         info->fw = adev->gfx.me_fw;
1170         header = (const struct common_firmware_header *)info->fw->data;
1171         adev->firmware.fw_size +=
1172                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1173
1174         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1175         info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1176         info->fw = adev->gfx.ce_fw;
1177         header = (const struct common_firmware_header *)info->fw->data;
1178         adev->firmware.fw_size +=
1179                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1180
1181         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1182         info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1183         info->fw = adev->gfx.rlc_fw;
1184         header = (const struct common_firmware_header *)info->fw->data;
1185         adev->firmware.fw_size +=
1186                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1187
1188         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1189         info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1190         info->fw = adev->gfx.mec_fw;
1191         header = (const struct common_firmware_header *)info->fw->data;
1192         adev->firmware.fw_size +=
1193                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1194
1195         /* we need account JT in */
1196         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1197         adev->firmware.fw_size +=
1198                 ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE);
1199
1200         if (amdgpu_sriov_vf(adev)) {
1201                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_STORAGE];
1202                 info->ucode_id = AMDGPU_UCODE_ID_STORAGE;
1203                 info->fw = adev->gfx.mec_fw;
1204                 adev->firmware.fw_size +=
1205                         ALIGN(le32_to_cpu(64 * PAGE_SIZE), PAGE_SIZE);
1206         }
1207
1208         if (adev->gfx.mec2_fw) {
1209                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1210                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1211                 info->fw = adev->gfx.mec2_fw;
1212                 header = (const struct common_firmware_header *)info->fw->data;
1213                 adev->firmware.fw_size +=
1214                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1215         }
1216
1217 out:
1218         if (err) {
1219                 dev_err(adev->dev,
1220                         "gfx8: Failed to load firmware \"%s\"\n",
1221                         fw_name);
1222                 release_firmware(adev->gfx.pfp_fw);
1223                 adev->gfx.pfp_fw = NULL;
1224                 release_firmware(adev->gfx.me_fw);
1225                 adev->gfx.me_fw = NULL;
1226                 release_firmware(adev->gfx.ce_fw);
1227                 adev->gfx.ce_fw = NULL;
1228                 release_firmware(adev->gfx.rlc_fw);
1229                 adev->gfx.rlc_fw = NULL;
1230                 release_firmware(adev->gfx.mec_fw);
1231                 adev->gfx.mec_fw = NULL;
1232                 release_firmware(adev->gfx.mec2_fw);
1233                 adev->gfx.mec2_fw = NULL;
1234         }
1235         return err;
1236 }
1237
1238 static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
1239                                     volatile u32 *buffer)
1240 {
1241         u32 count = 0, i;
1242         const struct cs_section_def *sect = NULL;
1243         const struct cs_extent_def *ext = NULL;
1244
1245         if (adev->gfx.rlc.cs_data == NULL)
1246                 return;
1247         if (buffer == NULL)
1248                 return;
1249
1250         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1251         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1252
1253         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1254         buffer[count++] = cpu_to_le32(0x80000000);
1255         buffer[count++] = cpu_to_le32(0x80000000);
1256
1257         for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1258                 for (ext = sect->section; ext->extent != NULL; ++ext) {
1259                         if (sect->id == SECT_CONTEXT) {
1260                                 buffer[count++] =
1261                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1262                                 buffer[count++] = cpu_to_le32(ext->reg_index -
1263                                                 PACKET3_SET_CONTEXT_REG_START);
1264                                 for (i = 0; i < ext->reg_count; i++)
1265                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
1266                         } else {
1267                                 return;
1268                         }
1269                 }
1270         }
1271
1272         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1273         buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
1274                         PACKET3_SET_CONTEXT_REG_START);
1275         buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config);
1276         buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1);
1277
1278         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1279         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1280
1281         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1282         buffer[count++] = cpu_to_le32(0);
1283 }
1284
1285 static int gfx_v8_0_cp_jump_table_num(struct amdgpu_device *adev)
1286 {
1287         if (adev->asic_type == CHIP_CARRIZO)
1288                 return 5;
1289         else
1290                 return 4;
1291 }
1292
1293 static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
1294 {
1295         const struct cs_section_def *cs_data;
1296         int r;
1297
1298         adev->gfx.rlc.cs_data = vi_cs_data;
1299
1300         cs_data = adev->gfx.rlc.cs_data;
1301
1302         if (cs_data) {
1303                 /* init clear state block */
1304                 r = amdgpu_gfx_rlc_init_csb(adev);
1305                 if (r)
1306                         return r;
1307         }
1308
1309         if ((adev->asic_type == CHIP_CARRIZO) ||
1310             (adev->asic_type == CHIP_STONEY)) {
1311                 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1312                 r = amdgpu_gfx_rlc_init_cpt(adev);
1313                 if (r)
1314                         return r;
1315         }
1316
1317         return 0;
1318 }
1319
1320 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
1321 {
1322         amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1323 }
1324
1325 static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
1326 {
1327         int r;
1328         u32 *hpd;
1329         size_t mec_hpd_size;
1330
1331         bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1332
1333         /* take ownership of the relevant compute queues */
1334         amdgpu_gfx_compute_queue_acquire(adev);
1335
1336         mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE;
1337
1338         r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1339                                       AMDGPU_GEM_DOMAIN_VRAM,
1340                                       &adev->gfx.mec.hpd_eop_obj,
1341                                       &adev->gfx.mec.hpd_eop_gpu_addr,
1342                                       (void **)&hpd);
1343         if (r) {
1344                 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1345                 return r;
1346         }
1347
1348         memset(hpd, 0, mec_hpd_size);
1349
1350         amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1351         amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1352
1353         return 0;
1354 }
1355
1356 static const u32 vgpr_init_compute_shader[] =
1357 {
1358         0x7e000209, 0x7e020208,
1359         0x7e040207, 0x7e060206,
1360         0x7e080205, 0x7e0a0204,
1361         0x7e0c0203, 0x7e0e0202,
1362         0x7e100201, 0x7e120200,
1363         0x7e140209, 0x7e160208,
1364         0x7e180207, 0x7e1a0206,
1365         0x7e1c0205, 0x7e1e0204,
1366         0x7e200203, 0x7e220202,
1367         0x7e240201, 0x7e260200,
1368         0x7e280209, 0x7e2a0208,
1369         0x7e2c0207, 0x7e2e0206,
1370         0x7e300205, 0x7e320204,
1371         0x7e340203, 0x7e360202,
1372         0x7e380201, 0x7e3a0200,
1373         0x7e3c0209, 0x7e3e0208,
1374         0x7e400207, 0x7e420206,
1375         0x7e440205, 0x7e460204,
1376         0x7e480203, 0x7e4a0202,
1377         0x7e4c0201, 0x7e4e0200,
1378         0x7e500209, 0x7e520208,
1379         0x7e540207, 0x7e560206,
1380         0x7e580205, 0x7e5a0204,
1381         0x7e5c0203, 0x7e5e0202,
1382         0x7e600201, 0x7e620200,
1383         0x7e640209, 0x7e660208,
1384         0x7e680207, 0x7e6a0206,
1385         0x7e6c0205, 0x7e6e0204,
1386         0x7e700203, 0x7e720202,
1387         0x7e740201, 0x7e760200,
1388         0x7e780209, 0x7e7a0208,
1389         0x7e7c0207, 0x7e7e0206,
1390         0xbf8a0000, 0xbf810000,
1391 };
1392
1393 static const u32 sgpr_init_compute_shader[] =
1394 {
1395         0xbe8a0100, 0xbe8c0102,
1396         0xbe8e0104, 0xbe900106,
1397         0xbe920108, 0xbe940100,
1398         0xbe960102, 0xbe980104,
1399         0xbe9a0106, 0xbe9c0108,
1400         0xbe9e0100, 0xbea00102,
1401         0xbea20104, 0xbea40106,
1402         0xbea60108, 0xbea80100,
1403         0xbeaa0102, 0xbeac0104,
1404         0xbeae0106, 0xbeb00108,
1405         0xbeb20100, 0xbeb40102,
1406         0xbeb60104, 0xbeb80106,
1407         0xbeba0108, 0xbebc0100,
1408         0xbebe0102, 0xbec00104,
1409         0xbec20106, 0xbec40108,
1410         0xbec60100, 0xbec80102,
1411         0xbee60004, 0xbee70005,
1412         0xbeea0006, 0xbeeb0007,
1413         0xbee80008, 0xbee90009,
1414         0xbefc0000, 0xbf8a0000,
1415         0xbf810000, 0x00000000,
1416 };
1417
1418 static const u32 vgpr_init_regs[] =
1419 {
1420         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1421         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */
1422         mmCOMPUTE_NUM_THREAD_X, 256*4,
1423         mmCOMPUTE_NUM_THREAD_Y, 1,
1424         mmCOMPUTE_NUM_THREAD_Z, 1,
1425         mmCOMPUTE_PGM_RSRC1, 0x100004f, /* VGPRS=15 (64 logical VGPRs), SGPRS=1 (16 SGPRs), BULKY=1 */
1426         mmCOMPUTE_PGM_RSRC2, 20,
1427         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1428         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1429         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1430         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1431         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1432         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1433         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1434         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1435         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1436         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1437 };
1438
1439 static const u32 sgpr1_init_regs[] =
1440 {
1441         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1442         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */
1443         mmCOMPUTE_NUM_THREAD_X, 256*5,
1444         mmCOMPUTE_NUM_THREAD_Y, 1,
1445         mmCOMPUTE_NUM_THREAD_Z, 1,
1446         mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */
1447         mmCOMPUTE_PGM_RSRC2, 20,
1448         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1449         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1450         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1451         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1452         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1453         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1454         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1455         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1456         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1457         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1458 };
1459
1460 static const u32 sgpr2_init_regs[] =
1461 {
1462         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1463         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1464         mmCOMPUTE_NUM_THREAD_X, 256*5,
1465         mmCOMPUTE_NUM_THREAD_Y, 1,
1466         mmCOMPUTE_NUM_THREAD_Z, 1,
1467         mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */
1468         mmCOMPUTE_PGM_RSRC2, 20,
1469         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1470         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1471         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1472         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1473         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1474         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1475         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1476         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1477         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1478         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1479 };
1480
1481 static const u32 sec_ded_counter_registers[] =
1482 {
1483         mmCPC_EDC_ATC_CNT,
1484         mmCPC_EDC_SCRATCH_CNT,
1485         mmCPC_EDC_UCODE_CNT,
1486         mmCPF_EDC_ATC_CNT,
1487         mmCPF_EDC_ROQ_CNT,
1488         mmCPF_EDC_TAG_CNT,
1489         mmCPG_EDC_ATC_CNT,
1490         mmCPG_EDC_DMA_CNT,
1491         mmCPG_EDC_TAG_CNT,
1492         mmDC_EDC_CSINVOC_CNT,
1493         mmDC_EDC_RESTORE_CNT,
1494         mmDC_EDC_STATE_CNT,
1495         mmGDS_EDC_CNT,
1496         mmGDS_EDC_GRBM_CNT,
1497         mmGDS_EDC_OA_DED,
1498         mmSPI_EDC_CNT,
1499         mmSQC_ATC_EDC_GATCL1_CNT,
1500         mmSQC_EDC_CNT,
1501         mmSQ_EDC_DED_CNT,
1502         mmSQ_EDC_INFO,
1503         mmSQ_EDC_SEC_CNT,
1504         mmTCC_EDC_CNT,
1505         mmTCP_ATC_EDC_GATCL1_CNT,
1506         mmTCP_EDC_CNT,
1507         mmTD_EDC_CNT
1508 };
1509
1510 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1511 {
1512         struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1513         struct amdgpu_ib ib;
1514         struct dma_fence *f = NULL;
1515         int r, i;
1516         u32 tmp;
1517         unsigned total_size, vgpr_offset, sgpr_offset;
1518         u64 gpu_addr;
1519
1520         /* only supported on CZ */
1521         if (adev->asic_type != CHIP_CARRIZO)
1522                 return 0;
1523
1524         /* bail if the compute ring is not ready */
1525         if (!ring->sched.ready)
1526                 return 0;
1527
1528         tmp = RREG32(mmGB_EDC_MODE);
1529         WREG32(mmGB_EDC_MODE, 0);
1530
1531         total_size =
1532                 (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1533         total_size +=
1534                 (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1535         total_size +=
1536                 (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1537         total_size = ALIGN(total_size, 256);
1538         vgpr_offset = total_size;
1539         total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1540         sgpr_offset = total_size;
1541         total_size += sizeof(sgpr_init_compute_shader);
1542
1543         /* allocate an indirect buffer to put the commands in */
1544         memset(&ib, 0, sizeof(ib));
1545         r = amdgpu_ib_get(adev, NULL, total_size, &ib);
1546         if (r) {
1547                 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1548                 return r;
1549         }
1550
1551         /* load the compute shaders */
1552         for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1553                 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1554
1555         for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1556                 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1557
1558         /* init the ib length to 0 */
1559         ib.length_dw = 0;
1560
1561         /* VGPR */
1562         /* write the register state for the compute dispatch */
1563         for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1564                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1565                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1566                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1567         }
1568         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1569         gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1570         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1571         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1572         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1573         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1574
1575         /* write dispatch packet */
1576         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1577         ib.ptr[ib.length_dw++] = 8; /* x */
1578         ib.ptr[ib.length_dw++] = 1; /* y */
1579         ib.ptr[ib.length_dw++] = 1; /* z */
1580         ib.ptr[ib.length_dw++] =
1581                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1582
1583         /* write CS partial flush packet */
1584         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1585         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1586
1587         /* SGPR1 */
1588         /* write the register state for the compute dispatch */
1589         for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1590                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1591                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1592                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1593         }
1594         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1595         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1596         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1597         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1598         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1599         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1600
1601         /* write dispatch packet */
1602         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1603         ib.ptr[ib.length_dw++] = 8; /* x */
1604         ib.ptr[ib.length_dw++] = 1; /* y */
1605         ib.ptr[ib.length_dw++] = 1; /* z */
1606         ib.ptr[ib.length_dw++] =
1607                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1608
1609         /* write CS partial flush packet */
1610         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1611         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1612
1613         /* SGPR2 */
1614         /* write the register state for the compute dispatch */
1615         for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1616                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1617                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1618                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1619         }
1620         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1621         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1622         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1623         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1624         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1625         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1626
1627         /* write dispatch packet */
1628         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1629         ib.ptr[ib.length_dw++] = 8; /* x */
1630         ib.ptr[ib.length_dw++] = 1; /* y */
1631         ib.ptr[ib.length_dw++] = 1; /* z */
1632         ib.ptr[ib.length_dw++] =
1633                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1634
1635         /* write CS partial flush packet */
1636         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1637         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1638
1639         /* shedule the ib on the ring */
1640         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1641         if (r) {
1642                 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1643                 goto fail;
1644         }
1645
1646         /* wait for the GPU to finish processing the IB */
1647         r = dma_fence_wait(f, false);
1648         if (r) {
1649                 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1650                 goto fail;
1651         }
1652
1653         tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1654         tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1655         WREG32(mmGB_EDC_MODE, tmp);
1656
1657         tmp = RREG32(mmCC_GC_EDC_CONFIG);
1658         tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1659         WREG32(mmCC_GC_EDC_CONFIG, tmp);
1660
1661
1662         /* read back registers to clear the counters */
1663         for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1664                 RREG32(sec_ded_counter_registers[i]);
1665
1666 fail:
1667         amdgpu_ib_free(adev, &ib, NULL);
1668         dma_fence_put(f);
1669
1670         return r;
1671 }
1672
1673 static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1674 {
1675         u32 gb_addr_config;
1676         u32 mc_shared_chmap, mc_arb_ramcfg;
1677         u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1678         u32 tmp;
1679         int ret;
1680
1681         switch (adev->asic_type) {
1682         case CHIP_TOPAZ:
1683                 adev->gfx.config.max_shader_engines = 1;
1684                 adev->gfx.config.max_tile_pipes = 2;
1685                 adev->gfx.config.max_cu_per_sh = 6;
1686                 adev->gfx.config.max_sh_per_se = 1;
1687                 adev->gfx.config.max_backends_per_se = 2;
1688                 adev->gfx.config.max_texture_channel_caches = 2;
1689                 adev->gfx.config.max_gprs = 256;
1690                 adev->gfx.config.max_gs_threads = 32;
1691                 adev->gfx.config.max_hw_contexts = 8;
1692
1693                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1694                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1695                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1696                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1697                 gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1698                 break;
1699         case CHIP_FIJI:
1700                 adev->gfx.config.max_shader_engines = 4;
1701                 adev->gfx.config.max_tile_pipes = 16;
1702                 adev->gfx.config.max_cu_per_sh = 16;
1703                 adev->gfx.config.max_sh_per_se = 1;
1704                 adev->gfx.config.max_backends_per_se = 4;
1705                 adev->gfx.config.max_texture_channel_caches = 16;
1706                 adev->gfx.config.max_gprs = 256;
1707                 adev->gfx.config.max_gs_threads = 32;
1708                 adev->gfx.config.max_hw_contexts = 8;
1709
1710                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1711                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1712                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1713                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1714                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1715                 break;
1716         case CHIP_POLARIS11:
1717         case CHIP_POLARIS12:
1718                 ret = amdgpu_atombios_get_gfx_info(adev);
1719                 if (ret)
1720                         return ret;
1721                 adev->gfx.config.max_gprs = 256;
1722                 adev->gfx.config.max_gs_threads = 32;
1723                 adev->gfx.config.max_hw_contexts = 8;
1724
1725                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1726                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1727                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1728                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1729                 gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
1730                 break;
1731         case CHIP_POLARIS10:
1732         case CHIP_VEGAM:
1733                 ret = amdgpu_atombios_get_gfx_info(adev);
1734                 if (ret)
1735                         return ret;
1736                 adev->gfx.config.max_gprs = 256;
1737                 adev->gfx.config.max_gs_threads = 32;
1738                 adev->gfx.config.max_hw_contexts = 8;
1739
1740                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1741                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1742                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1743                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1744                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1745                 break;
1746         case CHIP_TONGA:
1747                 adev->gfx.config.max_shader_engines = 4;
1748                 adev->gfx.config.max_tile_pipes = 8;
1749                 adev->gfx.config.max_cu_per_sh = 8;
1750                 adev->gfx.config.max_sh_per_se = 1;
1751                 adev->gfx.config.max_backends_per_se = 2;
1752                 adev->gfx.config.max_texture_channel_caches = 8;
1753                 adev->gfx.config.max_gprs = 256;
1754                 adev->gfx.config.max_gs_threads = 32;
1755                 adev->gfx.config.max_hw_contexts = 8;
1756
1757                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1758                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1759                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1760                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1761                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1762                 break;
1763         case CHIP_CARRIZO:
1764                 adev->gfx.config.max_shader_engines = 1;
1765                 adev->gfx.config.max_tile_pipes = 2;
1766                 adev->gfx.config.max_sh_per_se = 1;
1767                 adev->gfx.config.max_backends_per_se = 2;
1768                 adev->gfx.config.max_cu_per_sh = 8;
1769                 adev->gfx.config.max_texture_channel_caches = 2;
1770                 adev->gfx.config.max_gprs = 256;
1771                 adev->gfx.config.max_gs_threads = 32;
1772                 adev->gfx.config.max_hw_contexts = 8;
1773
1774                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1775                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1776                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1777                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1778                 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1779                 break;
1780         case CHIP_STONEY:
1781                 adev->gfx.config.max_shader_engines = 1;
1782                 adev->gfx.config.max_tile_pipes = 2;
1783                 adev->gfx.config.max_sh_per_se = 1;
1784                 adev->gfx.config.max_backends_per_se = 1;
1785                 adev->gfx.config.max_cu_per_sh = 3;
1786                 adev->gfx.config.max_texture_channel_caches = 2;
1787                 adev->gfx.config.max_gprs = 256;
1788                 adev->gfx.config.max_gs_threads = 16;
1789                 adev->gfx.config.max_hw_contexts = 8;
1790
1791                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1792                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1793                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1794                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1795                 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1796                 break;
1797         default:
1798                 adev->gfx.config.max_shader_engines = 2;
1799                 adev->gfx.config.max_tile_pipes = 4;
1800                 adev->gfx.config.max_cu_per_sh = 2;
1801                 adev->gfx.config.max_sh_per_se = 1;
1802                 adev->gfx.config.max_backends_per_se = 2;
1803                 adev->gfx.config.max_texture_channel_caches = 4;
1804                 adev->gfx.config.max_gprs = 256;
1805                 adev->gfx.config.max_gs_threads = 32;
1806                 adev->gfx.config.max_hw_contexts = 8;
1807
1808                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1809                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1810                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1811                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1812                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1813                 break;
1814         }
1815
1816         mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
1817         adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1818         mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1819
1820         adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1821         adev->gfx.config.mem_max_burst_length_bytes = 256;
1822         if (adev->flags & AMD_IS_APU) {
1823                 /* Get memory bank mapping mode. */
1824                 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
1825                 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1826                 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1827
1828                 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
1829                 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1830                 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1831
1832                 /* Validate settings in case only one DIMM installed. */
1833                 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
1834                         dimm00_addr_map = 0;
1835                 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
1836                         dimm01_addr_map = 0;
1837                 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
1838                         dimm10_addr_map = 0;
1839                 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
1840                         dimm11_addr_map = 0;
1841
1842                 /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1843                 /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1844                 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
1845                         adev->gfx.config.mem_row_size_in_kb = 2;
1846                 else
1847                         adev->gfx.config.mem_row_size_in_kb = 1;
1848         } else {
1849                 tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
1850                 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1851                 if (adev->gfx.config.mem_row_size_in_kb > 4)
1852                         adev->gfx.config.mem_row_size_in_kb = 4;
1853         }
1854
1855         adev->gfx.config.shader_engine_tile_size = 32;
1856         adev->gfx.config.num_gpus = 1;
1857         adev->gfx.config.multi_gpu_tile_size = 64;
1858
1859         /* fix up row size */
1860         switch (adev->gfx.config.mem_row_size_in_kb) {
1861         case 1:
1862         default:
1863                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
1864                 break;
1865         case 2:
1866                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
1867                 break;
1868         case 4:
1869                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
1870                 break;
1871         }
1872         adev->gfx.config.gb_addr_config = gb_addr_config;
1873
1874         return 0;
1875 }
1876
1877 static int gfx_v8_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
1878                                         int mec, int pipe, int queue)
1879 {
1880         int r;
1881         unsigned irq_type;
1882         struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
1883
1884         ring = &adev->gfx.compute_ring[ring_id];
1885
1886         /* mec0 is me1 */
1887         ring->me = mec + 1;
1888         ring->pipe = pipe;
1889         ring->queue = queue;
1890
1891         ring->ring_obj = NULL;
1892         ring->use_doorbell = true;
1893         ring->doorbell_index = adev->doorbell_index.mec_ring0 + ring_id;
1894         ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
1895                                 + (ring_id * GFX8_MEC_HPD_SIZE);
1896         sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
1897
1898         irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
1899                 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
1900                 + ring->pipe;
1901
1902         /* type-2 packets are deprecated on MEC, use type-3 instead */
1903         r = amdgpu_ring_init(adev, ring, 1024,
1904                         &adev->gfx.eop_irq, irq_type);
1905         if (r)
1906                 return r;
1907
1908
1909         return 0;
1910 }
1911
1912 static void gfx_v8_0_sq_irq_work_func(struct work_struct *work);
1913
1914 static int gfx_v8_0_sw_init(void *handle)
1915 {
1916         int i, j, k, r, ring_id;
1917         struct amdgpu_ring *ring;
1918         struct amdgpu_kiq *kiq;
1919         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1920
1921         switch (adev->asic_type) {
1922         case CHIP_TONGA:
1923         case CHIP_CARRIZO:
1924         case CHIP_FIJI:
1925         case CHIP_POLARIS10:
1926         case CHIP_POLARIS11:
1927         case CHIP_POLARIS12:
1928         case CHIP_VEGAM:
1929                 adev->gfx.mec.num_mec = 2;
1930                 break;
1931         case CHIP_TOPAZ:
1932         case CHIP_STONEY:
1933         default:
1934                 adev->gfx.mec.num_mec = 1;
1935                 break;
1936         }
1937
1938         adev->gfx.mec.num_pipe_per_mec = 4;
1939         adev->gfx.mec.num_queue_per_pipe = 8;
1940
1941         /* EOP Event */
1942         r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_END_OF_PIPE, &adev->gfx.eop_irq);
1943         if (r)
1944                 return r;
1945
1946         /* Privileged reg */
1947         r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_PRIV_REG_FAULT,
1948                               &adev->gfx.priv_reg_irq);
1949         if (r)
1950                 return r;
1951
1952         /* Privileged inst */
1953         r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_PRIV_INSTR_FAULT,
1954                               &adev->gfx.priv_inst_irq);
1955         if (r)
1956                 return r;
1957
1958         /* Add CP EDC/ECC irq  */
1959         r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_ECC_ERROR,
1960                               &adev->gfx.cp_ecc_error_irq);
1961         if (r)
1962                 return r;
1963
1964         /* SQ interrupts. */
1965         r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SQ_INTERRUPT_MSG,
1966                               &adev->gfx.sq_irq);
1967         if (r) {
1968                 DRM_ERROR("amdgpu_irq_add() for SQ failed: %d\n", r);
1969                 return r;
1970         }
1971
1972         INIT_WORK(&adev->gfx.sq_work.work, gfx_v8_0_sq_irq_work_func);
1973
1974         adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
1975
1976         gfx_v8_0_scratch_init(adev);
1977
1978         r = gfx_v8_0_init_microcode(adev);
1979         if (r) {
1980                 DRM_ERROR("Failed to load gfx firmware!\n");
1981                 return r;
1982         }
1983
1984         r = adev->gfx.rlc.funcs->init(adev);
1985         if (r) {
1986                 DRM_ERROR("Failed to init rlc BOs!\n");
1987                 return r;
1988         }
1989
1990         r = gfx_v8_0_mec_init(adev);
1991         if (r) {
1992                 DRM_ERROR("Failed to init MEC BOs!\n");
1993                 return r;
1994         }
1995
1996         /* set up the gfx ring */
1997         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
1998                 ring = &adev->gfx.gfx_ring[i];
1999                 ring->ring_obj = NULL;
2000                 sprintf(ring->name, "gfx");
2001                 /* no gfx doorbells on iceland */
2002                 if (adev->asic_type != CHIP_TOPAZ) {
2003                         ring->use_doorbell = true;
2004                         ring->doorbell_index = adev->doorbell_index.gfx_ring0;
2005                 }
2006
2007                 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2008                                      AMDGPU_CP_IRQ_GFX_EOP);
2009                 if (r)
2010                         return r;
2011         }
2012
2013
2014         /* set up the compute queues - allocate horizontally across pipes */
2015         ring_id = 0;
2016         for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
2017                 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
2018                         for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
2019                                 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
2020                                         continue;
2021
2022                                 r = gfx_v8_0_compute_ring_init(adev,
2023                                                                 ring_id,
2024                                                                 i, k, j);
2025                                 if (r)
2026                                         return r;
2027
2028                                 ring_id++;
2029                         }
2030                 }
2031         }
2032
2033         r = amdgpu_gfx_kiq_init(adev, GFX8_MEC_HPD_SIZE);
2034         if (r) {
2035                 DRM_ERROR("Failed to init KIQ BOs!\n");
2036                 return r;
2037         }
2038
2039         kiq = &adev->gfx.kiq;
2040         r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
2041         if (r)
2042                 return r;
2043
2044         /* create MQD for all compute queues as well as KIQ for SRIOV case */
2045         r = amdgpu_gfx_compute_mqd_sw_init(adev, sizeof(struct vi_mqd_allocation));
2046         if (r)
2047                 return r;
2048
2049         adev->gfx.ce_ram_size = 0x8000;
2050
2051         r = gfx_v8_0_gpu_early_init(adev);
2052         if (r)
2053                 return r;
2054
2055         return 0;
2056 }
2057
2058 static int gfx_v8_0_sw_fini(void *handle)
2059 {
2060         int i;
2061         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2062
2063         amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
2064         amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL);
2065         amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
2066
2067         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2068                 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2069         for (i = 0; i < adev->gfx.num_compute_rings; i++)
2070                 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2071
2072         amdgpu_gfx_compute_mqd_sw_fini(adev);
2073         amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
2074         amdgpu_gfx_kiq_fini(adev);
2075
2076         gfx_v8_0_mec_fini(adev);
2077         amdgpu_gfx_rlc_fini(adev);
2078         amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
2079                                 &adev->gfx.rlc.clear_state_gpu_addr,
2080                                 (void **)&adev->gfx.rlc.cs_ptr);
2081         if ((adev->asic_type == CHIP_CARRIZO) ||
2082             (adev->asic_type == CHIP_STONEY)) {
2083                 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
2084                                 &adev->gfx.rlc.cp_table_gpu_addr,
2085                                 (void **)&adev->gfx.rlc.cp_table_ptr);
2086         }
2087         gfx_v8_0_free_microcode(adev);
2088
2089         return 0;
2090 }
2091
2092 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
2093 {
2094         uint32_t *modearray, *mod2array;
2095         const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
2096         const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
2097         u32 reg_offset;
2098
2099         modearray = adev->gfx.config.tile_mode_array;
2100         mod2array = adev->gfx.config.macrotile_mode_array;
2101
2102         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2103                 modearray[reg_offset] = 0;
2104
2105         for (reg_offset = 0; reg_offset <  num_secondary_tile_mode_states; reg_offset++)
2106                 mod2array[reg_offset] = 0;
2107
2108         switch (adev->asic_type) {
2109         case CHIP_TOPAZ:
2110                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2111                                 PIPE_CONFIG(ADDR_SURF_P2) |
2112                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2113                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2114                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2115                                 PIPE_CONFIG(ADDR_SURF_P2) |
2116                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2117                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2118                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2119                                 PIPE_CONFIG(ADDR_SURF_P2) |
2120                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2121                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2122                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2123                                 PIPE_CONFIG(ADDR_SURF_P2) |
2124                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2125                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2126                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2127                                 PIPE_CONFIG(ADDR_SURF_P2) |
2128                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2129                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2130                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2131                                 PIPE_CONFIG(ADDR_SURF_P2) |
2132                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2133                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2134                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2135                                 PIPE_CONFIG(ADDR_SURF_P2) |
2136                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2137                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2138                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2139                                 PIPE_CONFIG(ADDR_SURF_P2));
2140                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2141                                 PIPE_CONFIG(ADDR_SURF_P2) |
2142                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2143                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2144                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2145                                  PIPE_CONFIG(ADDR_SURF_P2) |
2146                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2147                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2148                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2149                                  PIPE_CONFIG(ADDR_SURF_P2) |
2150                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2151                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2152                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2153                                  PIPE_CONFIG(ADDR_SURF_P2) |
2154                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2155                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2156                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2157                                  PIPE_CONFIG(ADDR_SURF_P2) |
2158                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2159                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2160                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2161                                  PIPE_CONFIG(ADDR_SURF_P2) |
2162                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2163                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2164                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2165                                  PIPE_CONFIG(ADDR_SURF_P2) |
2166                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2167                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2168                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2169                                  PIPE_CONFIG(ADDR_SURF_P2) |
2170                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2171                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2172                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2173                                  PIPE_CONFIG(ADDR_SURF_P2) |
2174                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2175                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2176                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2177                                  PIPE_CONFIG(ADDR_SURF_P2) |
2178                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2179                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2180                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2181                                  PIPE_CONFIG(ADDR_SURF_P2) |
2182                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2183                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2184                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2185                                  PIPE_CONFIG(ADDR_SURF_P2) |
2186                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2187                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2188                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2189                                  PIPE_CONFIG(ADDR_SURF_P2) |
2190                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2191                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2192                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2193                                  PIPE_CONFIG(ADDR_SURF_P2) |
2194                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2195                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2196                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2197                                  PIPE_CONFIG(ADDR_SURF_P2) |
2198                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2199                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2200                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2201                                  PIPE_CONFIG(ADDR_SURF_P2) |
2202                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2203                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2204                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2205                                  PIPE_CONFIG(ADDR_SURF_P2) |
2206                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2207                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2208                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2209                                  PIPE_CONFIG(ADDR_SURF_P2) |
2210                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2211                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2212
2213                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2214                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2215                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2216                                 NUM_BANKS(ADDR_SURF_8_BANK));
2217                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2218                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2219                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2220                                 NUM_BANKS(ADDR_SURF_8_BANK));
2221                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2222                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2223                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2224                                 NUM_BANKS(ADDR_SURF_8_BANK));
2225                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2226                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2227                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2228                                 NUM_BANKS(ADDR_SURF_8_BANK));
2229                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2230                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2231                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2232                                 NUM_BANKS(ADDR_SURF_8_BANK));
2233                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2234                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2235                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2236                                 NUM_BANKS(ADDR_SURF_8_BANK));
2237                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2238                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2239                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2240                                 NUM_BANKS(ADDR_SURF_8_BANK));
2241                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2242                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2243                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2244                                 NUM_BANKS(ADDR_SURF_16_BANK));
2245                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2246                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2247                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2248                                 NUM_BANKS(ADDR_SURF_16_BANK));
2249                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2250                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2251                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2252                                  NUM_BANKS(ADDR_SURF_16_BANK));
2253                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2254                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2255                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2256                                  NUM_BANKS(ADDR_SURF_16_BANK));
2257                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2258                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2259                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2260                                  NUM_BANKS(ADDR_SURF_16_BANK));
2261                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2262                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2263                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2264                                  NUM_BANKS(ADDR_SURF_16_BANK));
2265                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2266                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2267                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2268                                  NUM_BANKS(ADDR_SURF_8_BANK));
2269
2270                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2271                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2272                             reg_offset != 23)
2273                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2274
2275                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2276                         if (reg_offset != 7)
2277                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2278
2279                 break;
2280         case CHIP_FIJI:
2281         case CHIP_VEGAM:
2282                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2283                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2284                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2285                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2286                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2287                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2288                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2289                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2290                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2291                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2292                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2293                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2294                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2295                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2296                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2297                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2298                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2299                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2300                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2301                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2302                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2303                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2304                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2305                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2306                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2307                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2308                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2309                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2310                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2311                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2312                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2313                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2314                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2315                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2316                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2317                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2318                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2319                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2320                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2321                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2322                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2323                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2324                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2325                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2326                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2327                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2328                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2329                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2330                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2331                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2332                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2333                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2334                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2335                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2336                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2337                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2338                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2339                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2340                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2341                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2342                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2343                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2344                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2345                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2346                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2347                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2348                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2349                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2350                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2351                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2352                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2353                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2354                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2355                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2356                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2357                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2358                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2359                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2360                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2361                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2362                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2363                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2364                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2365                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2366                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2367                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2368                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2369                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2370                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2371                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2372                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2373                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2374                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2375                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2376                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2377                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2378                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2379                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2380                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2381                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2382                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2383                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2384                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2385                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2386                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2387                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2388                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2389                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2390                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2391                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2392                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2393                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2394                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2395                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2396                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2397                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2398                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2399                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2400                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2401                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2402                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2403                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2404
2405                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2406                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2407                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2408                                 NUM_BANKS(ADDR_SURF_8_BANK));
2409                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2410                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2411                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2412                                 NUM_BANKS(ADDR_SURF_8_BANK));
2413                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2414                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2415                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2416                                 NUM_BANKS(ADDR_SURF_8_BANK));
2417                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2418                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2419                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2420                                 NUM_BANKS(ADDR_SURF_8_BANK));
2421                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2422                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2423                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2424                                 NUM_BANKS(ADDR_SURF_8_BANK));
2425                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2426                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2427                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2428                                 NUM_BANKS(ADDR_SURF_8_BANK));
2429                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2430                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2431                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2432                                 NUM_BANKS(ADDR_SURF_8_BANK));
2433                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2434                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2435                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2436                                 NUM_BANKS(ADDR_SURF_8_BANK));
2437                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2438                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2439                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2440                                 NUM_BANKS(ADDR_SURF_8_BANK));
2441                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2442                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2443                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2444                                  NUM_BANKS(ADDR_SURF_8_BANK));
2445                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2446                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2447                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2448                                  NUM_BANKS(ADDR_SURF_8_BANK));
2449                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2450                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2451                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2452                                  NUM_BANKS(ADDR_SURF_8_BANK));
2453                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2454                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2455                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2456                                  NUM_BANKS(ADDR_SURF_8_BANK));
2457                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2458                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2459                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2460                                  NUM_BANKS(ADDR_SURF_4_BANK));
2461
2462                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2463                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2464
2465                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2466                         if (reg_offset != 7)
2467                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2468
2469                 break;
2470         case CHIP_TONGA:
2471                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2472                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2473                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2474                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2475                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2476                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2477                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2478                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2479                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2480                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2481                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2482                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2483                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2484                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2485                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2486                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2487                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2488                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2489                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2490                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2491                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2492                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2493                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2494                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2495                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2496                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2497                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2498                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2499                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2500                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2501                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2502                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2503                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2504                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2505                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2506                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2507                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2508                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2509                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2510                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2511                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2512                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2513                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2514                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2515                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2516                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2517                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2518                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2519                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2520                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2521                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2522                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2523                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2524                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2525                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2526                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2527                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2528                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2529                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2530                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2531                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2532                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2533                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2534                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2535                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2536                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2537                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2538                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2539                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2540                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2541                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2542                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2543                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2544                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2545                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2546                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2547                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2548                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2549                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2550                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2551                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2552                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2553                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2554                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2555                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2556                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2557                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2558                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2559                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2560                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2561                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2562                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2563                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2564                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2565                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2566                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2567                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2568                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2569                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2570                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2571                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2572                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2573                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2574                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2575                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2576                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2577                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2578                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2579                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2580                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2581                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2582                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2583                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2584                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2585                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2586                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2587                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2588                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2589                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2590                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2591                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2592                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2593
2594                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2595                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2596                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2597                                 NUM_BANKS(ADDR_SURF_16_BANK));
2598                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2599                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2600                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2601                                 NUM_BANKS(ADDR_SURF_16_BANK));
2602                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2603                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2604                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2605                                 NUM_BANKS(ADDR_SURF_16_BANK));
2606                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2607                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2608                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2609                                 NUM_BANKS(ADDR_SURF_16_BANK));
2610                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2611                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2612                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2613                                 NUM_BANKS(ADDR_SURF_16_BANK));
2614                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2615                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2616                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2617                                 NUM_BANKS(ADDR_SURF_16_BANK));
2618                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2619                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2620                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2621                                 NUM_BANKS(ADDR_SURF_16_BANK));
2622                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2623                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2624                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2625                                 NUM_BANKS(ADDR_SURF_16_BANK));
2626                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2627                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2628                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2629                                 NUM_BANKS(ADDR_SURF_16_BANK));
2630                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2631                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2632                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2633                                  NUM_BANKS(ADDR_SURF_16_BANK));
2634                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2635                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2636                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2637                                  NUM_BANKS(ADDR_SURF_16_BANK));
2638                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2639                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2640                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2641                                  NUM_BANKS(ADDR_SURF_8_BANK));
2642                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2643                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2644                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2645                                  NUM_BANKS(ADDR_SURF_4_BANK));
2646                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2647                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2648                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2649                                  NUM_BANKS(ADDR_SURF_4_BANK));
2650
2651                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2652                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2653
2654                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2655                         if (reg_offset != 7)
2656                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2657
2658                 break;
2659         case CHIP_POLARIS11:
2660         case CHIP_POLARIS12:
2661                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2662                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2663                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2664                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2665                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2666                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2667                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2668                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2669                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2670                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2671                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2672                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2673                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2674                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2675                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2676                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2677                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2678                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2679                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2680                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2681                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2682                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2683                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2684                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2685                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2686                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2687                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2688                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2689                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2690                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2691                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2692                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2693                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2694                                 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2695                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2696                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2697                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2698                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2699                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2700                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2701                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2702                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2703                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2704                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2705                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2706                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2707                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2708                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2709                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2710                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2711                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2712                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2713                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2714                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2715                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2716                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2717                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2718                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2719                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2720                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2721                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2722                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2723                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2724                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2725                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2726                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2727                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2728                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2729                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2730                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2731                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2732                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2733                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2734                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2735                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2736                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2737                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2738                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2739                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2740                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2741                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2742                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2743                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2744                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2745                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2746                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2747                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2748                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2749                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2750                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2751                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2752                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2753                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2754                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2755                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2756                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2757                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2758                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2759                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2760                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2761                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2762                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2763                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2764                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2765                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2766                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2767                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2768                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2769                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2770                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2771                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2772                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2773                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2774                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2775                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2776                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2777                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2778                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2779                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2780                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2781                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2782                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2783
2784                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2785                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2786                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2787                                 NUM_BANKS(ADDR_SURF_16_BANK));
2788
2789                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2790                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2791                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2792                                 NUM_BANKS(ADDR_SURF_16_BANK));
2793
2794                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2795                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2796                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2797                                 NUM_BANKS(ADDR_SURF_16_BANK));
2798
2799                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2800                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2801                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2802                                 NUM_BANKS(ADDR_SURF_16_BANK));
2803
2804                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2805                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2806                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2807                                 NUM_BANKS(ADDR_SURF_16_BANK));
2808
2809                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2810                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2811                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2812                                 NUM_BANKS(ADDR_SURF_16_BANK));
2813
2814                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2815                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2816                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2817                                 NUM_BANKS(ADDR_SURF_16_BANK));
2818
2819                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2820                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2821                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2822                                 NUM_BANKS(ADDR_SURF_16_BANK));
2823
2824                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2825                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2826                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2827                                 NUM_BANKS(ADDR_SURF_16_BANK));
2828
2829                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2830                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2831                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2832                                 NUM_BANKS(ADDR_SURF_16_BANK));
2833
2834                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2835                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2836                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2837                                 NUM_BANKS(ADDR_SURF_16_BANK));
2838
2839                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2840                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2841                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2842                                 NUM_BANKS(ADDR_SURF_16_BANK));
2843
2844                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2845                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2846                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2847                                 NUM_BANKS(ADDR_SURF_8_BANK));
2848
2849                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2850                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2851                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2852                                 NUM_BANKS(ADDR_SURF_4_BANK));
2853
2854                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2855                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2856
2857                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2858                         if (reg_offset != 7)
2859                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2860
2861                 break;
2862         case CHIP_POLARIS10:
2863                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2864                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2865                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2866                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2867                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2868                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2869                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2870                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2871                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2872                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2873                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2874                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2875                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2876                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2877                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2878                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2879                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2880                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2881                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2882                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2883                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2884                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2885                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2886                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2887                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2888                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2889                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2890                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2891                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2892                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2893                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2894                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2895                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2896                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2897                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2898                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2899                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2900                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2901                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2902                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2903                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2904                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2905                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2906                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2907                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2908                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2909                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2910                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2911                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2912                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2913                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2914                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2915                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2916                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2917                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2918                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2919                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2920                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2921                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2922                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2923                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2924                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2925                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2926                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2927                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2928                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2929                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2930                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2931                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2932                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2933                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2934                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2935                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2936                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2937                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2938                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2939                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2940                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2941                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2942                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2943                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2944                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2945                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2946                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2947                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2948                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2949                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2950                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2951                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2952                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2953                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2954                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2955                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2956                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2957                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2958                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2959                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2960                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2961                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2962                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2963                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2964                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2965                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2966                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2967                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2968                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2969                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2970                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2971                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2972                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2973                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2974                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2975                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2976                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2977                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2978                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2979                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2980                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2981                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2982                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2983                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2984                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2985
2986                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2987                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2988                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2989                                 NUM_BANKS(ADDR_SURF_16_BANK));
2990
2991                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2992                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2993                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2994                                 NUM_BANKS(ADDR_SURF_16_BANK));
2995
2996                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2997                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2998                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2999                                 NUM_BANKS(ADDR_SURF_16_BANK));
3000
3001                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3002                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3003                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3004                                 NUM_BANKS(ADDR_SURF_16_BANK));
3005
3006                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3007                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3008                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3009                                 NUM_BANKS(ADDR_SURF_16_BANK));
3010
3011                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3012                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3013                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3014                                 NUM_BANKS(ADDR_SURF_16_BANK));
3015
3016                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3017                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3018                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3019                                 NUM_BANKS(ADDR_SURF_16_BANK));
3020
3021                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3022                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3023                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3024                                 NUM_BANKS(ADDR_SURF_16_BANK));
3025
3026                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3027                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3028                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3029                                 NUM_BANKS(ADDR_SURF_16_BANK));
3030
3031                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3032                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3033                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3034                                 NUM_BANKS(ADDR_SURF_16_BANK));
3035
3036                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3037                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3038                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3039                                 NUM_BANKS(ADDR_SURF_16_BANK));
3040
3041                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3042                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3043                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3044                                 NUM_BANKS(ADDR_SURF_8_BANK));
3045
3046                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3047                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3048                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3049                                 NUM_BANKS(ADDR_SURF_4_BANK));
3050
3051                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3052                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3053                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3054                                 NUM_BANKS(ADDR_SURF_4_BANK));
3055
3056                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3057                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3058
3059                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3060                         if (reg_offset != 7)
3061                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3062
3063                 break;
3064         case CHIP_STONEY:
3065                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3066                                 PIPE_CONFIG(ADDR_SURF_P2) |
3067                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3068                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3069                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3070                                 PIPE_CONFIG(ADDR_SURF_P2) |
3071                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3072                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3073                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3074                                 PIPE_CONFIG(ADDR_SURF_P2) |
3075                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3076                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3077                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3078                                 PIPE_CONFIG(ADDR_SURF_P2) |
3079                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3080                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3081                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3082                                 PIPE_CONFIG(ADDR_SURF_P2) |
3083                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3084                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3085                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3086                                 PIPE_CONFIG(ADDR_SURF_P2) |
3087                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3088                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3089                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3090                                 PIPE_CONFIG(ADDR_SURF_P2) |
3091                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3092                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3093                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3094                                 PIPE_CONFIG(ADDR_SURF_P2));
3095                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3096                                 PIPE_CONFIG(ADDR_SURF_P2) |
3097                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3098                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3099                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3100                                  PIPE_CONFIG(ADDR_SURF_P2) |
3101                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3102                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3103                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3104                                  PIPE_CONFIG(ADDR_SURF_P2) |
3105                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3106                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3107                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3108                                  PIPE_CONFIG(ADDR_SURF_P2) |
3109                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3110                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3111                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3112                                  PIPE_CONFIG(ADDR_SURF_P2) |
3113                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3114                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3115                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3116                                  PIPE_CONFIG(ADDR_SURF_P2) |
3117                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3118                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3119                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3120                                  PIPE_CONFIG(ADDR_SURF_P2) |
3121                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3122                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3123                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3124                                  PIPE_CONFIG(ADDR_SURF_P2) |
3125                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3126                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3127                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3128                                  PIPE_CONFIG(ADDR_SURF_P2) |
3129                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3130                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3131                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3132                                  PIPE_CONFIG(ADDR_SURF_P2) |
3133                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3134                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3135                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3136                                  PIPE_CONFIG(ADDR_SURF_P2) |
3137                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3138                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3139                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3140                                  PIPE_CONFIG(ADDR_SURF_P2) |
3141                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3142                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3143                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3144                                  PIPE_CONFIG(ADDR_SURF_P2) |
3145                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3146                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3147                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3148                                  PIPE_CONFIG(ADDR_SURF_P2) |
3149                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3150                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3151                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3152                                  PIPE_CONFIG(ADDR_SURF_P2) |
3153                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3154                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3155                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3156                                  PIPE_CONFIG(ADDR_SURF_P2) |
3157                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3158                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3159                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3160                                  PIPE_CONFIG(ADDR_SURF_P2) |
3161                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3162                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3163                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3164                                  PIPE_CONFIG(ADDR_SURF_P2) |
3165                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3166                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3167
3168                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3169                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3170                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3171                                 NUM_BANKS(ADDR_SURF_8_BANK));
3172                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3173                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3174                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3175                                 NUM_BANKS(ADDR_SURF_8_BANK));
3176                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3177                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3178                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3179                                 NUM_BANKS(ADDR_SURF_8_BANK));
3180                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3181                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3182                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3183                                 NUM_BANKS(ADDR_SURF_8_BANK));
3184                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3185                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3186                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3187                                 NUM_BANKS(ADDR_SURF_8_BANK));
3188                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3189                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3190                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3191                                 NUM_BANKS(ADDR_SURF_8_BANK));
3192                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3193                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3194                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3195                                 NUM_BANKS(ADDR_SURF_8_BANK));
3196                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3197                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3198                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3199                                 NUM_BANKS(ADDR_SURF_16_BANK));
3200                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3201                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3202                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3203                                 NUM_BANKS(ADDR_SURF_16_BANK));
3204                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3205                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3206                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3207                                  NUM_BANKS(ADDR_SURF_16_BANK));
3208                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3209                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3210                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3211                                  NUM_BANKS(ADDR_SURF_16_BANK));
3212                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3213                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3214                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3215                                  NUM_BANKS(ADDR_SURF_16_BANK));
3216                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3217                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3218                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3219                                  NUM_BANKS(ADDR_SURF_16_BANK));
3220                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3221                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3222                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3223                                  NUM_BANKS(ADDR_SURF_8_BANK));
3224
3225                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3226                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3227                             reg_offset != 23)
3228                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3229
3230                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3231                         if (reg_offset != 7)
3232                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3233
3234                 break;
3235         default:
3236                 dev_warn(adev->dev,
3237                          "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
3238                          adev->asic_type);
3239
3240         case CHIP_CARRIZO:
3241                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3242                                 PIPE_CONFIG(ADDR_SURF_P2) |
3243                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3244                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3245                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3246                                 PIPE_CONFIG(ADDR_SURF_P2) |
3247                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3248                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3249                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3250                                 PIPE_CONFIG(ADDR_SURF_P2) |
3251                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3252                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3253                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3254                                 PIPE_CONFIG(ADDR_SURF_P2) |
3255                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3256                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3257                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3258                                 PIPE_CONFIG(ADDR_SURF_P2) |
3259                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3260                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3261                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3262                                 PIPE_CONFIG(ADDR_SURF_P2) |
3263                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3264                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3265                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3266                                 PIPE_CONFIG(ADDR_SURF_P2) |
3267                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3268                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3269                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3270                                 PIPE_CONFIG(ADDR_SURF_P2));
3271                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3272                                 PIPE_CONFIG(ADDR_SURF_P2) |
3273                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3274                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3275                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3276                                  PIPE_CONFIG(ADDR_SURF_P2) |
3277                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3278                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3279                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3280                                  PIPE_CONFIG(ADDR_SURF_P2) |
3281                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3282                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3283                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3284                                  PIPE_CONFIG(ADDR_SURF_P2) |
3285                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3286                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3287                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3288                                  PIPE_CONFIG(ADDR_SURF_P2) |
3289                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3290                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3291                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3292                                  PIPE_CONFIG(ADDR_SURF_P2) |
3293                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3294                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3295                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3296                                  PIPE_CONFIG(ADDR_SURF_P2) |
3297                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3298                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3299                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3300                                  PIPE_CONFIG(ADDR_SURF_P2) |
3301                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3302                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3303                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3304                                  PIPE_CONFIG(ADDR_SURF_P2) |
3305                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3306                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3307                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3308                                  PIPE_CONFIG(ADDR_SURF_P2) |
3309                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3310                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3311                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3312                                  PIPE_CONFIG(ADDR_SURF_P2) |
3313                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3314                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3315                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3316                                  PIPE_CONFIG(ADDR_SURF_P2) |
3317                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3318                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3319                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3320                                  PIPE_CONFIG(ADDR_SURF_P2) |
3321                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3322                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3323                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3324                                  PIPE_CONFIG(ADDR_SURF_P2) |
3325                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3326                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3327                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3328                                  PIPE_CONFIG(ADDR_SURF_P2) |
3329                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3330                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3331                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3332                                  PIPE_CONFIG(ADDR_SURF_P2) |
3333                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3334                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3335                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3336                                  PIPE_CONFIG(ADDR_SURF_P2) |
3337                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3338                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3339                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3340                                  PIPE_CONFIG(ADDR_SURF_P2) |
3341                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3342                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3343
3344                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3345                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3346                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3347                                 NUM_BANKS(ADDR_SURF_8_BANK));
3348                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3349                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3350                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3351                                 NUM_BANKS(ADDR_SURF_8_BANK));
3352                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3353                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3354                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3355                                 NUM_BANKS(ADDR_SURF_8_BANK));
3356                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3357                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3358                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3359                                 NUM_BANKS(ADDR_SURF_8_BANK));
3360                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3361                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3362                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3363                                 NUM_BANKS(ADDR_SURF_8_BANK));
3364                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3365                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3366                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3367                                 NUM_BANKS(ADDR_SURF_8_BANK));
3368                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3369                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3370                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3371                                 NUM_BANKS(ADDR_SURF_8_BANK));
3372                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3373                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3374                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3375                                 NUM_BANKS(ADDR_SURF_16_BANK));
3376                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3377                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3378                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3379                                 NUM_BANKS(ADDR_SURF_16_BANK));
3380                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3381                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3382                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3383                                  NUM_BANKS(ADDR_SURF_16_BANK));
3384                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3385                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3386                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3387                                  NUM_BANKS(ADDR_SURF_16_BANK));
3388                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3389                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3390                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3391                                  NUM_BANKS(ADDR_SURF_16_BANK));
3392                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3393                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3394                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3395                                  NUM_BANKS(ADDR_SURF_16_BANK));
3396                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3397                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3398                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3399                                  NUM_BANKS(ADDR_SURF_8_BANK));
3400
3401                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3402                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3403                             reg_offset != 23)
3404                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3405
3406                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3407                         if (reg_offset != 7)
3408                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3409
3410                 break;
3411         }
3412 }
3413
3414 static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
3415                                   u32 se_num, u32 sh_num, u32 instance)
3416 {
3417         u32 data;
3418
3419         if (instance == 0xffffffff)
3420                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
3421         else
3422                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
3423
3424         if (se_num == 0xffffffff)
3425                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3426         else
3427                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3428
3429         if (sh_num == 0xffffffff)
3430                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3431         else
3432                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3433
3434         WREG32(mmGRBM_GFX_INDEX, data);
3435 }
3436
3437 static void gfx_v8_0_select_me_pipe_q(struct amdgpu_device *adev,
3438                                   u32 me, u32 pipe, u32 q)
3439 {
3440         vi_srbm_select(adev, me, pipe, q, 0);
3441 }
3442
3443 static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
3444 {
3445         u32 data, mask;
3446
3447         data =  RREG32(mmCC_RB_BACKEND_DISABLE) |
3448                 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3449
3450         data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE);
3451
3452         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
3453                                          adev->gfx.config.max_sh_per_se);
3454
3455         return (~data) & mask;
3456 }
3457
3458 static void
3459 gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1)
3460 {
3461         switch (adev->asic_type) {
3462         case CHIP_FIJI:
3463         case CHIP_VEGAM:
3464                 *rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) |
3465                           RB_XSEL2(1) | PKR_MAP(2) |
3466                           PKR_XSEL(1) | PKR_YSEL(1) |
3467                           SE_MAP(2) | SE_XSEL(2) | SE_YSEL(3);
3468                 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) |
3469                            SE_PAIR_YSEL(2);
3470                 break;
3471         case CHIP_TONGA:
3472         case CHIP_POLARIS10:
3473                 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3474                           SE_XSEL(1) | SE_YSEL(1);
3475                 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(2) |
3476                            SE_PAIR_YSEL(2);
3477                 break;
3478         case CHIP_TOPAZ:
3479         case CHIP_CARRIZO:
3480                 *rconf |= RB_MAP_PKR0(2);
3481                 *rconf1 |= 0x0;
3482                 break;
3483         case CHIP_POLARIS11:
3484         case CHIP_POLARIS12:
3485                 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3486                           SE_XSEL(1) | SE_YSEL(1);
3487                 *rconf1 |= 0x0;
3488                 break;
3489         case CHIP_STONEY:
3490                 *rconf |= 0x0;
3491                 *rconf1 |= 0x0;
3492                 break;
3493         default:
3494                 DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type);
3495                 break;
3496         }
3497 }
3498
3499 static void
3500 gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev,
3501                                         u32 raster_config, u32 raster_config_1,
3502                                         unsigned rb_mask, unsigned num_rb)
3503 {
3504         unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1);
3505         unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1);
3506         unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2);
3507         unsigned rb_per_se = num_rb / num_se;
3508         unsigned se_mask[4];
3509         unsigned se;
3510
3511         se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask;
3512         se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask;
3513         se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask;
3514         se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask;
3515
3516         WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4));
3517         WARN_ON(!(sh_per_se == 1 || sh_per_se == 2));
3518         WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2));
3519
3520         if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
3521                              (!se_mask[2] && !se_mask[3]))) {
3522                 raster_config_1 &= ~SE_PAIR_MAP_MASK;
3523
3524                 if (!se_mask[0] && !se_mask[1]) {
3525                         raster_config_1 |=
3526                                 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3);
3527                 } else {
3528                         raster_config_1 |=
3529                                 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0);
3530                 }
3531         }
3532
3533         for (se = 0; se < num_se; se++) {
3534                 unsigned raster_config_se = raster_config;
3535                 unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
3536                 unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
3537                 int idx = (se / 2) * 2;
3538
3539                 if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
3540                         raster_config_se &= ~SE_MAP_MASK;
3541
3542                         if (!se_mask[idx]) {
3543                                 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3);
3544                         } else {
3545                                 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0);
3546                         }
3547                 }
3548
3549                 pkr0_mask &= rb_mask;
3550                 pkr1_mask &= rb_mask;
3551                 if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
3552                         raster_config_se &= ~PKR_MAP_MASK;
3553
3554                         if (!pkr0_mask) {
3555                                 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3);
3556                         } else {
3557                                 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0);
3558                         }
3559                 }
3560
3561                 if (rb_per_se >= 2) {
3562                         unsigned rb0_mask = 1 << (se * rb_per_se);
3563                         unsigned rb1_mask = rb0_mask << 1;
3564
3565                         rb0_mask &= rb_mask;
3566                         rb1_mask &= rb_mask;
3567                         if (!rb0_mask || !rb1_mask) {
3568                                 raster_config_se &= ~RB_MAP_PKR0_MASK;
3569
3570                                 if (!rb0_mask) {
3571                                         raster_config_se |=
3572                                                 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3);
3573                                 } else {
3574                                         raster_config_se |=
3575                                                 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0);
3576                                 }
3577                         }
3578
3579                         if (rb_per_se > 2) {
3580                                 rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
3581                                 rb1_mask = rb0_mask << 1;
3582                                 rb0_mask &= rb_mask;
3583                                 rb1_mask &= rb_mask;
3584                                 if (!rb0_mask || !rb1_mask) {
3585                                         raster_config_se &= ~RB_MAP_PKR1_MASK;
3586
3587                                         if (!rb0_mask) {
3588                                                 raster_config_se |=
3589                                                         RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3);
3590                                         } else {
3591                                                 raster_config_se |=
3592                                                         RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0);
3593                                         }
3594                                 }
3595                         }
3596                 }
3597
3598                 /* GRBM_GFX_INDEX has a different offset on VI */
3599                 gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff);
3600                 WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se);
3601                 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3602         }
3603
3604         /* GRBM_GFX_INDEX has a different offset on VI */
3605         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3606 }
3607
3608 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
3609 {
3610         int i, j;
3611         u32 data;
3612         u32 raster_config = 0, raster_config_1 = 0;
3613         u32 active_rbs = 0;
3614         u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
3615                                         adev->gfx.config.max_sh_per_se;
3616         unsigned num_rb_pipes;
3617
3618         mutex_lock(&adev->grbm_idx_mutex);
3619         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3620                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3621                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3622                         data = gfx_v8_0_get_rb_active_bitmap(adev);
3623                         active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
3624                                                rb_bitmap_width_per_sh);
3625                 }
3626         }
3627         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3628
3629         adev->gfx.config.backend_enable_mask = active_rbs;
3630         adev->gfx.config.num_rbs = hweight32(active_rbs);
3631
3632         num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se *
3633                              adev->gfx.config.max_shader_engines, 16);
3634
3635         gfx_v8_0_raster_config(adev, &raster_config, &raster_config_1);
3636
3637         if (!adev->gfx.config.backend_enable_mask ||
3638                         adev->gfx.config.num_rbs >= num_rb_pipes) {
3639                 WREG32(mmPA_SC_RASTER_CONFIG, raster_config);
3640                 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3641         } else {
3642                 gfx_v8_0_write_harvested_raster_configs(adev, raster_config, raster_config_1,
3643                                                         adev->gfx.config.backend_enable_mask,
3644                                                         num_rb_pipes);
3645         }
3646
3647         /* cache the values for userspace */
3648         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3649                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3650                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3651                         adev->gfx.config.rb_config[i][j].rb_backend_disable =
3652                                 RREG32(mmCC_RB_BACKEND_DISABLE);
3653                         adev->gfx.config.rb_config[i][j].user_rb_backend_disable =
3654                                 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3655                         adev->gfx.config.rb_config[i][j].raster_config =
3656                                 RREG32(mmPA_SC_RASTER_CONFIG);
3657                         adev->gfx.config.rb_config[i][j].raster_config_1 =
3658                                 RREG32(mmPA_SC_RASTER_CONFIG_1);
3659                 }
3660         }
3661         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3662         mutex_unlock(&adev->grbm_idx_mutex);
3663 }
3664
3665 /**
3666  * gfx_v8_0_init_compute_vmid - gart enable
3667  *
3668  * @adev: amdgpu_device pointer
3669  *
3670  * Initialize compute vmid sh_mem registers
3671  *
3672  */
3673 #define DEFAULT_SH_MEM_BASES    (0x6000)
3674 #define FIRST_COMPUTE_VMID      (8)
3675 #define LAST_COMPUTE_VMID       (16)
3676 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
3677 {
3678         int i;
3679         uint32_t sh_mem_config;
3680         uint32_t sh_mem_bases;
3681
3682         /*
3683          * Configure apertures:
3684          * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
3685          * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
3686          * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
3687          */
3688         sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
3689
3690         sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
3691                         SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
3692                         SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
3693                         SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
3694                         MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
3695                         SH_MEM_CONFIG__PRIVATE_ATC_MASK;
3696
3697         mutex_lock(&adev->srbm_mutex);
3698         for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
3699                 vi_srbm_select(adev, 0, 0, 0, i);
3700                 /* CP and shaders */
3701                 WREG32(mmSH_MEM_CONFIG, sh_mem_config);
3702                 WREG32(mmSH_MEM_APE1_BASE, 1);
3703                 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3704                 WREG32(mmSH_MEM_BASES, sh_mem_bases);
3705         }
3706         vi_srbm_select(adev, 0, 0, 0, 0);
3707         mutex_unlock(&adev->srbm_mutex);
3708 }
3709
3710 static void gfx_v8_0_config_init(struct amdgpu_device *adev)
3711 {
3712         switch (adev->asic_type) {
3713         default:
3714                 adev->gfx.config.double_offchip_lds_buf = 1;
3715                 break;
3716         case CHIP_CARRIZO:
3717         case CHIP_STONEY:
3718                 adev->gfx.config.double_offchip_lds_buf = 0;
3719                 break;
3720         }
3721 }
3722
3723 static void gfx_v8_0_constants_init(struct amdgpu_device *adev)
3724 {
3725         u32 tmp, sh_static_mem_cfg;
3726         int i;
3727
3728         WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF);
3729         WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3730         WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3731         WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
3732
3733         gfx_v8_0_tiling_mode_table_init(adev);
3734         gfx_v8_0_setup_rb(adev);
3735         gfx_v8_0_get_cu_info(adev);
3736         gfx_v8_0_config_init(adev);
3737
3738         /* XXX SH_MEM regs */
3739         /* where to put LDS, scratch, GPUVM in FSA64 space */
3740         sh_static_mem_cfg = REG_SET_FIELD(0, SH_STATIC_MEM_CONFIG,
3741                                    SWIZZLE_ENABLE, 1);
3742         sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3743                                    ELEMENT_SIZE, 1);
3744         sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3745                                    INDEX_STRIDE, 3);
3746         WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg);
3747
3748         mutex_lock(&adev->srbm_mutex);
3749         for (i = 0; i < adev->vm_manager.id_mgr[0].num_ids; i++) {
3750                 vi_srbm_select(adev, 0, 0, 0, i);
3751                 /* CP and shaders */
3752                 if (i == 0) {
3753                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
3754                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3755                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3756                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3757                         WREG32(mmSH_MEM_CONFIG, tmp);
3758                         WREG32(mmSH_MEM_BASES, 0);
3759                 } else {
3760                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
3761                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3762                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3763                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3764                         WREG32(mmSH_MEM_CONFIG, tmp);
3765                         tmp = adev->gmc.shared_aperture_start >> 48;
3766                         WREG32(mmSH_MEM_BASES, tmp);
3767                 }
3768
3769                 WREG32(mmSH_MEM_APE1_BASE, 1);
3770                 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3771         }
3772         vi_srbm_select(adev, 0, 0, 0, 0);
3773         mutex_unlock(&adev->srbm_mutex);
3774
3775         gfx_v8_0_init_compute_vmid(adev);
3776
3777         mutex_lock(&adev->grbm_idx_mutex);
3778         /*
3779          * making sure that the following register writes will be broadcasted
3780          * to all the shaders
3781          */
3782         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3783
3784         WREG32(mmPA_SC_FIFO_SIZE,
3785                    (adev->gfx.config.sc_prim_fifo_size_frontend <<
3786                         PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
3787                    (adev->gfx.config.sc_prim_fifo_size_backend <<
3788                         PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
3789                    (adev->gfx.config.sc_hiz_tile_fifo_size <<
3790                         PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
3791                    (adev->gfx.config.sc_earlyz_tile_fifo_size <<
3792                         PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
3793
3794         tmp = RREG32(mmSPI_ARB_PRIORITY);
3795         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS0, 2);
3796         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS1, 2);
3797         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS2, 2);
3798         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS3, 2);
3799         WREG32(mmSPI_ARB_PRIORITY, tmp);
3800
3801         mutex_unlock(&adev->grbm_idx_mutex);
3802
3803 }
3804
3805 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3806 {
3807         u32 i, j, k;
3808         u32 mask;
3809
3810         mutex_lock(&adev->grbm_idx_mutex);
3811         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3812                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3813                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3814                         for (k = 0; k < adev->usec_timeout; k++) {
3815                                 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3816                                         break;
3817                                 udelay(1);
3818                         }
3819                         if (k == adev->usec_timeout) {
3820                                 gfx_v8_0_select_se_sh(adev, 0xffffffff,
3821                                                       0xffffffff, 0xffffffff);
3822                                 mutex_unlock(&adev->grbm_idx_mutex);
3823                                 DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
3824                                          i, j);
3825                                 return;
3826                         }
3827                 }
3828         }
3829         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3830         mutex_unlock(&adev->grbm_idx_mutex);
3831
3832         mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3833                 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3834                 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3835                 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3836         for (k = 0; k < adev->usec_timeout; k++) {
3837                 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3838                         break;
3839                 udelay(1);
3840         }
3841 }
3842
3843 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3844                                                bool enable)
3845 {
3846         u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3847
3848         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
3849         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
3850         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
3851         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
3852
3853         WREG32(mmCP_INT_CNTL_RING0, tmp);
3854 }
3855
3856 static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
3857 {
3858         /* csib */
3859         WREG32(mmRLC_CSIB_ADDR_HI,
3860                         adev->gfx.rlc.clear_state_gpu_addr >> 32);
3861         WREG32(mmRLC_CSIB_ADDR_LO,
3862                         adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
3863         WREG32(mmRLC_CSIB_LENGTH,
3864                         adev->gfx.rlc.clear_state_size);
3865 }
3866
3867 static void gfx_v8_0_parse_ind_reg_list(int *register_list_format,
3868                                 int ind_offset,
3869                                 int list_size,
3870                                 int *unique_indices,
3871                                 int *indices_count,
3872                                 int max_indices,
3873                                 int *ind_start_offsets,
3874                                 int *offset_count,
3875                                 int max_offset)
3876 {
3877         int indices;
3878         bool new_entry = true;
3879
3880         for (; ind_offset < list_size; ind_offset++) {
3881
3882                 if (new_entry) {
3883                         new_entry = false;
3884                         ind_start_offsets[*offset_count] = ind_offset;
3885                         *offset_count = *offset_count + 1;
3886                         BUG_ON(*offset_count >= max_offset);
3887                 }
3888
3889                 if (register_list_format[ind_offset] == 0xFFFFFFFF) {
3890                         new_entry = true;
3891                         continue;
3892                 }
3893
3894                 ind_offset += 2;
3895
3896                 /* look for the matching indice */
3897                 for (indices = 0;
3898                         indices < *indices_count;
3899                         indices++) {
3900                         if (unique_indices[indices] ==
3901                                 register_list_format[ind_offset])
3902                                 break;
3903                 }
3904
3905                 if (indices >= *indices_count) {
3906                         unique_indices[*indices_count] =
3907                                 register_list_format[ind_offset];
3908                         indices = *indices_count;
3909                         *indices_count = *indices_count + 1;
3910                         BUG_ON(*indices_count >= max_indices);
3911                 }
3912
3913                 register_list_format[ind_offset] = indices;
3914         }
3915 }
3916
3917 static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
3918 {
3919         int i, temp, data;
3920         int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0};
3921         int indices_count = 0;
3922         int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3923         int offset_count = 0;
3924
3925         int list_size;
3926         unsigned int *register_list_format =
3927                 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
3928         if (!register_list_format)
3929                 return -ENOMEM;
3930         memcpy(register_list_format, adev->gfx.rlc.register_list_format,
3931                         adev->gfx.rlc.reg_list_format_size_bytes);
3932
3933         gfx_v8_0_parse_ind_reg_list(register_list_format,
3934                                 RLC_FormatDirectRegListLength,
3935                                 adev->gfx.rlc.reg_list_format_size_bytes >> 2,
3936                                 unique_indices,
3937                                 &indices_count,
3938                                 ARRAY_SIZE(unique_indices),
3939                                 indirect_start_offsets,
3940                                 &offset_count,
3941                                 ARRAY_SIZE(indirect_start_offsets));
3942
3943         /* save and restore list */
3944         WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1);
3945
3946         WREG32(mmRLC_SRM_ARAM_ADDR, 0);
3947         for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
3948                 WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]);
3949
3950         /* indirect list */
3951         WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start);
3952         for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
3953                 WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]);
3954
3955         list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
3956         list_size = list_size >> 1;
3957         WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size);
3958         WREG32(mmRLC_GPM_SCRATCH_DATA, list_size);
3959
3960         /* starting offsets starts */
3961         WREG32(mmRLC_GPM_SCRATCH_ADDR,
3962                 adev->gfx.rlc.starting_offsets_start);
3963         for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
3964                 WREG32(mmRLC_GPM_SCRATCH_DATA,
3965                                 indirect_start_offsets[i]);
3966
3967         /* unique indices */
3968         temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
3969         data = mmRLC_SRM_INDEX_CNTL_DATA_0;
3970         for (i = 0; i < ARRAY_SIZE(unique_indices); i++) {
3971                 if (unique_indices[i] != 0) {
3972                         WREG32(temp + i, unique_indices[i] & 0x3FFFF);
3973                         WREG32(data + i, unique_indices[i] >> 20);
3974                 }
3975         }
3976         kfree(register_list_format);
3977
3978         return 0;
3979 }
3980
3981 static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
3982 {
3983         WREG32_FIELD(RLC_SRM_CNTL, SRM_ENABLE, 1);
3984 }
3985
3986 static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev)
3987 {
3988         uint32_t data;
3989
3990         WREG32_FIELD(CP_RB_WPTR_POLL_CNTL, IDLE_POLL_COUNT, 0x60);
3991
3992         data = REG_SET_FIELD(0, RLC_PG_DELAY, POWER_UP_DELAY, 0x10);
3993         data = REG_SET_FIELD(data, RLC_PG_DELAY, POWER_DOWN_DELAY, 0x10);
3994         data = REG_SET_FIELD(data, RLC_PG_DELAY, CMD_PROPAGATE_DELAY, 0x10);
3995         data = REG_SET_FIELD(data, RLC_PG_DELAY, MEM_SLEEP_DELAY, 0x10);
3996         WREG32(mmRLC_PG_DELAY, data);
3997
3998         WREG32_FIELD(RLC_PG_DELAY_2, SERDES_CMD_DELAY, 0x3);
3999         WREG32_FIELD(RLC_AUTO_PG_CTRL, GRBM_REG_SAVE_GFX_IDLE_THRESHOLD, 0x55f0);
4000
4001 }
4002
4003 static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
4004                                                 bool enable)
4005 {
4006         WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PU_ENABLE, enable ? 1 : 0);
4007 }
4008
4009 static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
4010                                                   bool enable)
4011 {
4012         WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PD_ENABLE, enable ? 1 : 0);
4013 }
4014
4015 static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable)
4016 {
4017         WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 0 : 1);
4018 }
4019
4020 static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
4021 {
4022         if ((adev->asic_type == CHIP_CARRIZO) ||
4023             (adev->asic_type == CHIP_STONEY)) {
4024                 gfx_v8_0_init_csb(adev);
4025                 gfx_v8_0_init_save_restore_list(adev);
4026                 gfx_v8_0_enable_save_restore_machine(adev);
4027                 WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
4028                 gfx_v8_0_init_power_gating(adev);
4029                 WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
4030         } else if ((adev->asic_type == CHIP_POLARIS11) ||
4031                    (adev->asic_type == CHIP_POLARIS12) ||
4032                    (adev->asic_type == CHIP_VEGAM)) {
4033                 gfx_v8_0_init_csb(adev);
4034                 gfx_v8_0_init_save_restore_list(adev);
4035                 gfx_v8_0_enable_save_restore_machine(adev);
4036                 gfx_v8_0_init_power_gating(adev);
4037         }
4038
4039 }
4040
4041 static void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
4042 {
4043         WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 0);
4044
4045         gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4046         gfx_v8_0_wait_for_rlc_serdes(adev);
4047 }
4048
4049 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
4050 {
4051         WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4052         udelay(50);
4053
4054         WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
4055         udelay(50);
4056 }
4057
4058 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
4059 {
4060         WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 1);
4061
4062         /* carrizo do enable cp interrupt after cp inited */
4063         if (!(adev->flags & AMD_IS_APU))
4064                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4065
4066         udelay(50);
4067 }
4068
4069 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
4070 {
4071         if (amdgpu_sriov_vf(adev)) {
4072                 gfx_v8_0_init_csb(adev);
4073                 return 0;
4074         }
4075
4076         adev->gfx.rlc.funcs->stop(adev);
4077         adev->gfx.rlc.funcs->reset(adev);
4078         gfx_v8_0_init_pg(adev);
4079         adev->gfx.rlc.funcs->start(adev);
4080
4081         return 0;
4082 }
4083
4084 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
4085 {
4086         int i;
4087         u32 tmp = RREG32(mmCP_ME_CNTL);
4088
4089         if (enable) {
4090                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
4091                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
4092                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
4093         } else {
4094                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
4095                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
4096                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
4097                 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
4098                         adev->gfx.gfx_ring[i].sched.ready = false;
4099         }
4100         WREG32(mmCP_ME_CNTL, tmp);
4101         udelay(50);
4102 }
4103
4104 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
4105 {
4106         u32 count = 0;
4107         const struct cs_section_def *sect = NULL;
4108         const struct cs_extent_def *ext = NULL;
4109
4110         /* begin clear state */
4111         count += 2;
4112         /* context control state */
4113         count += 3;
4114
4115         for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4116                 for (ext = sect->section; ext->extent != NULL; ++ext) {
4117                         if (sect->id == SECT_CONTEXT)
4118                                 count += 2 + ext->reg_count;
4119                         else
4120                                 return 0;
4121                 }
4122         }
4123         /* pa_sc_raster_config/pa_sc_raster_config1 */
4124         count += 4;
4125         /* end clear state */
4126         count += 2;
4127         /* clear state */
4128         count += 2;
4129
4130         return count;
4131 }
4132
4133 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
4134 {
4135         struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
4136         const struct cs_section_def *sect = NULL;
4137         const struct cs_extent_def *ext = NULL;
4138         int r, i;
4139
4140         /* init the CP */
4141         WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
4142         WREG32(mmCP_ENDIAN_SWAP, 0);
4143         WREG32(mmCP_DEVICE_ID, 1);
4144
4145         gfx_v8_0_cp_gfx_enable(adev, true);
4146
4147         r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
4148         if (r) {
4149                 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
4150                 return r;
4151         }
4152
4153         /* clear state buffer */
4154         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4155         amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4156
4157         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4158         amdgpu_ring_write(ring, 0x80000000);
4159         amdgpu_ring_write(ring, 0x80000000);
4160
4161         for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4162                 for (ext = sect->section; ext->extent != NULL; ++ext) {
4163                         if (sect->id == SECT_CONTEXT) {
4164                                 amdgpu_ring_write(ring,
4165                                        PACKET3(PACKET3_SET_CONTEXT_REG,
4166                                                ext->reg_count));
4167                                 amdgpu_ring_write(ring,
4168                                        ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
4169                                 for (i = 0; i < ext->reg_count; i++)
4170                                         amdgpu_ring_write(ring, ext->extent[i]);
4171                         }
4172                 }
4173         }
4174
4175         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4176         amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
4177         amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config);
4178         amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config_1);
4179
4180         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4181         amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4182
4183         amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4184         amdgpu_ring_write(ring, 0);
4185
4186         /* init the CE partitions */
4187         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4188         amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4189         amdgpu_ring_write(ring, 0x8000);
4190         amdgpu_ring_write(ring, 0x8000);
4191
4192         amdgpu_ring_commit(ring);
4193
4194         return 0;
4195 }
4196 static void gfx_v8_0_set_cpg_door_bell(struct amdgpu_device *adev, struct amdgpu_ring *ring)
4197 {
4198         u32 tmp;
4199         /* no gfx doorbells on iceland */
4200         if (adev->asic_type == CHIP_TOPAZ)
4201                 return;
4202
4203         tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
4204
4205         if (ring->use_doorbell) {
4206                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4207                                 DOORBELL_OFFSET, ring->doorbell_index);
4208                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4209                                                 DOORBELL_HIT, 0);
4210                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4211                                             DOORBELL_EN, 1);
4212         } else {
4213                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
4214         }
4215
4216         WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
4217
4218         if (adev->flags & AMD_IS_APU)
4219                 return;
4220
4221         tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
4222                                         DOORBELL_RANGE_LOWER,
4223                                         adev->doorbell_index.gfx_ring0);
4224         WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
4225
4226         WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
4227                 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
4228 }
4229
4230 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
4231 {
4232         struct amdgpu_ring *ring;
4233         u32 tmp;
4234         u32 rb_bufsz;
4235         u64 rb_addr, rptr_addr, wptr_gpu_addr;
4236         int r;
4237
4238         /* Set the write pointer delay */
4239         WREG32(mmCP_RB_WPTR_DELAY, 0);
4240
4241         /* set the RB to use vmid 0 */
4242         WREG32(mmCP_RB_VMID, 0);
4243
4244         /* Set ring buffer size */
4245         ring = &adev->gfx.gfx_ring[0];
4246         rb_bufsz = order_base_2(ring->ring_size / 8);
4247         tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
4248         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
4249         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
4250         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
4251 #ifdef __BIG_ENDIAN
4252         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
4253 #endif
4254         WREG32(mmCP_RB0_CNTL, tmp);
4255
4256         /* Initialize the ring buffer's read and write pointers */
4257         WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
4258         ring->wptr = 0;
4259         WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
4260
4261         /* set the wb address wether it's enabled or not */
4262         rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4263         WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
4264         WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
4265
4266         wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4267         WREG32(mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
4268         WREG32(mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
4269         mdelay(1);
4270         WREG32(mmCP_RB0_CNTL, tmp);
4271
4272         rb_addr = ring->gpu_addr >> 8;
4273         WREG32(mmCP_RB0_BASE, rb_addr);
4274         WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
4275
4276         gfx_v8_0_set_cpg_door_bell(adev, ring);
4277         /* start the ring */
4278         amdgpu_ring_clear_ring(ring);
4279         gfx_v8_0_cp_gfx_start(adev);
4280         ring->sched.ready = true;
4281         r = amdgpu_ring_test_helper(ring);
4282
4283         return r;
4284 }
4285
4286 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
4287 {
4288         int i;
4289
4290         if (enable) {
4291                 WREG32(mmCP_MEC_CNTL, 0);
4292         } else {
4293                 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
4294                 for (i = 0; i < adev->gfx.num_compute_rings; i++)
4295                         adev->gfx.compute_ring[i].sched.ready = false;
4296                 adev->gfx.kiq.ring.sched.ready = false;
4297         }
4298         udelay(50);
4299 }
4300
4301 /* KIQ functions */
4302 static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring)
4303 {
4304         uint32_t tmp;
4305         struct amdgpu_device *adev = ring->adev;
4306
4307         /* tell RLC which is KIQ queue */
4308         tmp = RREG32(mmRLC_CP_SCHEDULERS);
4309         tmp &= 0xffffff00;
4310         tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
4311         WREG32(mmRLC_CP_SCHEDULERS, tmp);
4312         tmp |= 0x80;
4313         WREG32(mmRLC_CP_SCHEDULERS, tmp);
4314 }
4315
4316 static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev)
4317 {
4318         struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
4319         uint64_t queue_mask = 0;
4320         int r, i;
4321
4322         for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
4323                 if (!test_bit(i, adev->gfx.mec.queue_bitmap))
4324                         continue;
4325
4326                 /* This situation may be hit in the future if a new HW
4327                  * generation exposes more than 64 queues. If so, the
4328                  * definition of queue_mask needs updating */
4329                 if (WARN_ON(i >= (sizeof(queue_mask)*8))) {
4330                         DRM_ERROR("Invalid KCQ enabled: %d\n", i);
4331                         break;
4332                 }
4333
4334                 queue_mask |= (1ull << i);
4335         }
4336
4337         r = amdgpu_ring_alloc(kiq_ring, (8 * adev->gfx.num_compute_rings) + 8);
4338         if (r) {
4339                 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
4340                 return r;
4341         }
4342         /* set resources */
4343         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
4344         amdgpu_ring_write(kiq_ring, 0); /* vmid_mask:0 queue_type:0 (KIQ) */
4345         amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */
4346         amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */
4347         amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
4348         amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
4349         amdgpu_ring_write(kiq_ring, 0); /* oac mask */
4350         amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
4351         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4352                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4353                 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
4354                 uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4355
4356                 /* map queues */
4357                 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
4358                 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
4359                 amdgpu_ring_write(kiq_ring,
4360                                   PACKET3_MAP_QUEUES_NUM_QUEUES(1));
4361                 amdgpu_ring_write(kiq_ring,
4362                                   PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index) |
4363                                   PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
4364                                   PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
4365                                   PACKET3_MAP_QUEUES_ME(ring->me == 1 ? 0 : 1)); /* doorbell */
4366                 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
4367                 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
4368                 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
4369                 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
4370         }
4371
4372         r = amdgpu_ring_test_helper(kiq_ring);
4373         if (r)
4374                 DRM_ERROR("KCQ enable failed\n");
4375         return r;
4376 }
4377
4378 static int gfx_v8_0_deactivate_hqd(struct amdgpu_device *adev, u32 req)
4379 {
4380         int i, r = 0;
4381
4382         if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) {
4383                 WREG32_FIELD(CP_HQD_DEQUEUE_REQUEST, DEQUEUE_REQ, req);
4384                 for (i = 0; i < adev->usec_timeout; i++) {
4385                         if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK))
4386                                 break;
4387                         udelay(1);
4388                 }
4389                 if (i == adev->usec_timeout)
4390                         r = -ETIMEDOUT;
4391         }
4392         WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
4393         WREG32(mmCP_HQD_PQ_RPTR, 0);
4394         WREG32(mmCP_HQD_PQ_WPTR, 0);
4395
4396         return r;
4397 }
4398
4399 static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
4400 {
4401         struct amdgpu_device *adev = ring->adev;
4402         struct vi_mqd *mqd = ring->mqd_ptr;
4403         uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
4404         uint32_t tmp;
4405
4406         mqd->header = 0xC0310800;
4407         mqd->compute_pipelinestat_enable = 0x00000001;
4408         mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4409         mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4410         mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4411         mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4412         mqd->compute_misc_reserved = 0x00000003;
4413         mqd->dynamic_cu_mask_addr_lo = lower_32_bits(ring->mqd_gpu_addr
4414                                                      + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4415         mqd->dynamic_cu_mask_addr_hi = upper_32_bits(ring->mqd_gpu_addr
4416                                                      + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4417         eop_base_addr = ring->eop_gpu_addr >> 8;
4418         mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
4419         mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
4420
4421         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4422         tmp = RREG32(mmCP_HQD_EOP_CONTROL);
4423         tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
4424                         (order_base_2(GFX8_MEC_HPD_SIZE / 4) - 1));
4425
4426         mqd->cp_hqd_eop_control = tmp;
4427
4428         /* enable doorbell? */
4429         tmp = REG_SET_FIELD(RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL),
4430                             CP_HQD_PQ_DOORBELL_CONTROL,
4431                             DOORBELL_EN,
4432                             ring->use_doorbell ? 1 : 0);
4433
4434         mqd->cp_hqd_pq_doorbell_control = tmp;
4435
4436         /* set the pointer to the MQD */
4437         mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
4438         mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
4439
4440         /* set MQD vmid to 0 */
4441         tmp = RREG32(mmCP_MQD_CONTROL);
4442         tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4443         mqd->cp_mqd_control = tmp;
4444
4445         /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4446         hqd_gpu_addr = ring->gpu_addr >> 8;
4447         mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4448         mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4449
4450         /* set up the HQD, this is similar to CP_RB0_CNTL */
4451         tmp = RREG32(mmCP_HQD_PQ_CONTROL);
4452         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4453                             (order_base_2(ring->ring_size / 4) - 1));
4454         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4455                         ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
4456 #ifdef __BIG_ENDIAN
4457         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
4458 #endif
4459         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
4460         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
4461         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4462         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4463         mqd->cp_hqd_pq_control = tmp;
4464
4465         /* set the wb address whether it's enabled or not */
4466         wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4467         mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4468         mqd->cp_hqd_pq_rptr_report_addr_hi =
4469                 upper_32_bits(wb_gpu_addr) & 0xffff;
4470
4471         /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4472         wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4473         mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
4474         mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4475
4476         tmp = 0;
4477         /* enable the doorbell if requested */
4478         if (ring->use_doorbell) {
4479                 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4480                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4481                                 DOORBELL_OFFSET, ring->doorbell_index);
4482
4483                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4484                                          DOORBELL_EN, 1);
4485                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4486                                          DOORBELL_SOURCE, 0);
4487                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4488                                          DOORBELL_HIT, 0);
4489         }
4490
4491         mqd->cp_hqd_pq_doorbell_control = tmp;
4492
4493         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4494         ring->wptr = 0;
4495         mqd->cp_hqd_pq_wptr = ring->wptr;
4496         mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
4497
4498         /* set the vmid for the queue */
4499         mqd->cp_hqd_vmid = 0;
4500
4501         tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
4502         tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
4503         mqd->cp_hqd_persistent_state = tmp;
4504
4505         /* set MTYPE */
4506         tmp = RREG32(mmCP_HQD_IB_CONTROL);
4507         tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
4508         tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MTYPE, 3);
4509         mqd->cp_hqd_ib_control = tmp;
4510
4511         tmp = RREG32(mmCP_HQD_IQ_TIMER);
4512         tmp = REG_SET_FIELD(tmp, CP_HQD_IQ_TIMER, MTYPE, 3);
4513         mqd->cp_hqd_iq_timer = tmp;
4514
4515         tmp = RREG32(mmCP_HQD_CTX_SAVE_CONTROL);
4516         tmp = REG_SET_FIELD(tmp, CP_HQD_CTX_SAVE_CONTROL, MTYPE, 3);
4517         mqd->cp_hqd_ctx_save_control = tmp;
4518
4519         /* defaults */
4520         mqd->cp_hqd_eop_rptr = RREG32(mmCP_HQD_EOP_RPTR);
4521         mqd->cp_hqd_eop_wptr = RREG32(mmCP_HQD_EOP_WPTR);
4522         mqd->cp_hqd_pipe_priority = RREG32(mmCP_HQD_PIPE_PRIORITY);
4523         mqd->cp_hqd_queue_priority = RREG32(mmCP_HQD_QUEUE_PRIORITY);
4524         mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM);
4525         mqd->cp_hqd_ctx_save_base_addr_lo = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_LO);
4526         mqd->cp_hqd_ctx_save_base_addr_hi = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_HI);
4527         mqd->cp_hqd_cntl_stack_offset = RREG32(mmCP_HQD_CNTL_STACK_OFFSET);
4528         mqd->cp_hqd_cntl_stack_size = RREG32(mmCP_HQD_CNTL_STACK_SIZE);
4529         mqd->cp_hqd_wg_state_offset = RREG32(mmCP_HQD_WG_STATE_OFFSET);
4530         mqd->cp_hqd_ctx_save_size = RREG32(mmCP_HQD_CTX_SAVE_SIZE);
4531         mqd->cp_hqd_eop_done_events = RREG32(mmCP_HQD_EOP_EVENTS);
4532         mqd->cp_hqd_error = RREG32(mmCP_HQD_ERROR);
4533         mqd->cp_hqd_eop_wptr_mem = RREG32(mmCP_HQD_EOP_WPTR_MEM);
4534         mqd->cp_hqd_eop_dones = RREG32(mmCP_HQD_EOP_DONES);
4535
4536         /* activate the queue */
4537         mqd->cp_hqd_active = 1;
4538
4539         return 0;
4540 }
4541
4542 int gfx_v8_0_mqd_commit(struct amdgpu_device *adev,
4543                         struct vi_mqd *mqd)
4544 {
4545         uint32_t mqd_reg;
4546         uint32_t *mqd_data;
4547
4548         /* HQD registers extend from mmCP_MQD_BASE_ADDR to mmCP_HQD_ERROR */
4549         mqd_data = &mqd->cp_mqd_base_addr_lo;
4550
4551         /* disable wptr polling */
4552         WREG32_FIELD(CP_PQ_WPTR_POLL_CNTL, EN, 0);
4553
4554         /* program all HQD registers */
4555         for (mqd_reg = mmCP_HQD_VMID; mqd_reg <= mmCP_HQD_EOP_CONTROL; mqd_reg++)
4556                 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4557
4558         /* Tonga errata: EOP RPTR/WPTR should be left unmodified.
4559          * This is safe since EOP RPTR==WPTR for any inactive HQD
4560          * on ASICs that do not support context-save.
4561          * EOP writes/reads can start anywhere in the ring.
4562          */
4563         if (adev->asic_type != CHIP_TONGA) {
4564                 WREG32(mmCP_HQD_EOP_RPTR, mqd->cp_hqd_eop_rptr);
4565                 WREG32(mmCP_HQD_EOP_WPTR, mqd->cp_hqd_eop_wptr);
4566                 WREG32(mmCP_HQD_EOP_WPTR_MEM, mqd->cp_hqd_eop_wptr_mem);
4567         }
4568
4569         for (mqd_reg = mmCP_HQD_EOP_EVENTS; mqd_reg <= mmCP_HQD_ERROR; mqd_reg++)
4570                 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4571
4572         /* activate the HQD */
4573         for (mqd_reg = mmCP_MQD_BASE_ADDR; mqd_reg <= mmCP_HQD_ACTIVE; mqd_reg++)
4574                 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4575
4576         return 0;
4577 }
4578
4579 static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring)
4580 {
4581         struct amdgpu_device *adev = ring->adev;
4582         struct vi_mqd *mqd = ring->mqd_ptr;
4583         int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
4584
4585         gfx_v8_0_kiq_setting(ring);
4586
4587         if (adev->in_gpu_reset) { /* for GPU_RESET case */
4588                 /* reset MQD to a clean status */
4589                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4590                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4591
4592                 /* reset ring buffer */
4593                 ring->wptr = 0;
4594                 amdgpu_ring_clear_ring(ring);
4595                 mutex_lock(&adev->srbm_mutex);
4596                 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4597                 gfx_v8_0_mqd_commit(adev, mqd);
4598                 vi_srbm_select(adev, 0, 0, 0, 0);
4599                 mutex_unlock(&adev->srbm_mutex);
4600         } else {
4601                 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4602                 ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4603                 ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
4604                 mutex_lock(&adev->srbm_mutex);
4605                 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4606                 gfx_v8_0_mqd_init(ring);
4607                 gfx_v8_0_mqd_commit(adev, mqd);
4608                 vi_srbm_select(adev, 0, 0, 0, 0);
4609                 mutex_unlock(&adev->srbm_mutex);
4610
4611                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4612                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4613         }
4614
4615         return 0;
4616 }
4617
4618 static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring)
4619 {
4620         struct amdgpu_device *adev = ring->adev;
4621         struct vi_mqd *mqd = ring->mqd_ptr;
4622         int mqd_idx = ring - &adev->gfx.compute_ring[0];
4623
4624         if (!adev->in_gpu_reset && !adev->in_suspend) {
4625                 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4626                 ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4627                 ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
4628                 mutex_lock(&adev->srbm_mutex);
4629                 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4630                 gfx_v8_0_mqd_init(ring);
4631                 vi_srbm_select(adev, 0, 0, 0, 0);
4632                 mutex_unlock(&adev->srbm_mutex);
4633
4634                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4635                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4636         } else if (adev->in_gpu_reset) { /* for GPU_RESET case */
4637                 /* reset MQD to a clean status */
4638                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4639                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4640                 /* reset ring buffer */
4641                 ring->wptr = 0;
4642                 amdgpu_ring_clear_ring(ring);
4643         } else {
4644                 amdgpu_ring_clear_ring(ring);
4645         }
4646         return 0;
4647 }
4648
4649 static void gfx_v8_0_set_mec_doorbell_range(struct amdgpu_device *adev)
4650 {
4651         if (adev->asic_type > CHIP_TONGA) {
4652                 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, adev->doorbell_index.kiq << 2);
4653                 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, adev->doorbell_index.mec_ring7 << 2);
4654         }
4655         /* enable doorbells */
4656         WREG32_FIELD(CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4657 }
4658
4659 static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev)
4660 {
4661         struct amdgpu_ring *ring;
4662         int r;
4663
4664         ring = &adev->gfx.kiq.ring;
4665
4666         r = amdgpu_bo_reserve(ring->mqd_obj, false);
4667         if (unlikely(r != 0))
4668                 return r;
4669
4670         r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4671         if (unlikely(r != 0))
4672                 return r;
4673
4674         gfx_v8_0_kiq_init_queue(ring);
4675         amdgpu_bo_kunmap(ring->mqd_obj);
4676         ring->mqd_ptr = NULL;
4677         amdgpu_bo_unreserve(ring->mqd_obj);
4678         ring->sched.ready = true;
4679         return 0;
4680 }
4681
4682 static int gfx_v8_0_kcq_resume(struct amdgpu_device *adev)
4683 {
4684         struct amdgpu_ring *ring = NULL;
4685         int r = 0, i;
4686
4687         gfx_v8_0_cp_compute_enable(adev, true);
4688
4689         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4690                 ring = &adev->gfx.compute_ring[i];
4691
4692                 r = amdgpu_bo_reserve(ring->mqd_obj, false);
4693                 if (unlikely(r != 0))
4694                         goto done;
4695                 r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4696                 if (!r) {
4697                         r = gfx_v8_0_kcq_init_queue(ring);
4698                         amdgpu_bo_kunmap(ring->mqd_obj);
4699                         ring->mqd_ptr = NULL;
4700                 }
4701                 amdgpu_bo_unreserve(ring->mqd_obj);
4702                 if (r)
4703                         goto done;
4704         }
4705
4706         gfx_v8_0_set_mec_doorbell_range(adev);
4707
4708         r = gfx_v8_0_kiq_kcq_enable(adev);
4709         if (r)
4710                 goto done;
4711
4712         /* Test KCQs - reversing the order of rings seems to fix ring test failure
4713          * after GPU reset
4714          */
4715         for (i = adev->gfx.num_compute_rings - 1; i >= 0; i--) {
4716                 ring = &adev->gfx.compute_ring[i];
4717                 r = amdgpu_ring_test_helper(ring);
4718         }
4719
4720 done:
4721         return r;
4722 }
4723
4724 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
4725 {
4726         int r;
4727
4728         if (!(adev->flags & AMD_IS_APU))
4729                 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4730
4731         r = gfx_v8_0_kiq_resume(adev);
4732         if (r)
4733                 return r;
4734
4735         r = gfx_v8_0_cp_gfx_resume(adev);
4736         if (r)
4737                 return r;
4738
4739         r = gfx_v8_0_kcq_resume(adev);
4740         if (r)
4741                 return r;
4742         gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4743
4744         return 0;
4745 }
4746
4747 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
4748 {
4749         gfx_v8_0_cp_gfx_enable(adev, enable);
4750         gfx_v8_0_cp_compute_enable(adev, enable);
4751 }
4752
4753 static int gfx_v8_0_hw_init(void *handle)
4754 {
4755         int r;
4756         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4757
4758         gfx_v8_0_init_golden_registers(adev);
4759         gfx_v8_0_constants_init(adev);
4760
4761         r = adev->gfx.rlc.funcs->resume(adev);
4762         if (r)
4763                 return r;
4764
4765         r = gfx_v8_0_cp_resume(adev);
4766
4767         return r;
4768 }
4769
4770 static int gfx_v8_0_kcq_disable(struct amdgpu_device *adev)
4771 {
4772         int r, i;
4773         struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
4774
4775         r = amdgpu_ring_alloc(kiq_ring, 6 * adev->gfx.num_compute_rings);
4776         if (r)
4777                 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
4778
4779         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4780                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4781
4782                 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
4783                 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
4784                                                 PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */
4785                                                 PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
4786                                                 PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) |
4787                                                 PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
4788                 amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
4789                 amdgpu_ring_write(kiq_ring, 0);
4790                 amdgpu_ring_write(kiq_ring, 0);
4791                 amdgpu_ring_write(kiq_ring, 0);
4792         }
4793         r = amdgpu_ring_test_helper(kiq_ring);
4794         if (r)
4795                 DRM_ERROR("KCQ disable failed\n");
4796
4797         return r;
4798 }
4799
4800 static bool gfx_v8_0_is_idle(void *handle)
4801 {
4802         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4803
4804         if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE)
4805                 || RREG32(mmGRBM_STATUS2) != 0x8)
4806                 return false;
4807         else
4808                 return true;
4809 }
4810
4811 static bool gfx_v8_0_rlc_is_idle(void *handle)
4812 {
4813         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4814
4815         if (RREG32(mmGRBM_STATUS2) != 0x8)
4816                 return false;
4817         else
4818                 return true;
4819 }
4820
4821 static int gfx_v8_0_wait_for_rlc_idle(void *handle)
4822 {
4823         unsigned int i;
4824         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4825
4826         for (i = 0; i < adev->usec_timeout; i++) {
4827                 if (gfx_v8_0_rlc_is_idle(handle))
4828                         return 0;
4829
4830                 udelay(1);
4831         }
4832         return -ETIMEDOUT;
4833 }
4834
4835 static int gfx_v8_0_wait_for_idle(void *handle)
4836 {
4837         unsigned int i;
4838         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4839
4840         for (i = 0; i < adev->usec_timeout; i++) {
4841                 if (gfx_v8_0_is_idle(handle))
4842                         return 0;
4843
4844                 udelay(1);
4845         }
4846         return -ETIMEDOUT;
4847 }
4848
4849 static int gfx_v8_0_hw_fini(void *handle)
4850 {
4851         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4852
4853         amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
4854         amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
4855
4856         amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
4857
4858         amdgpu_irq_put(adev, &adev->gfx.sq_irq, 0);
4859
4860         /* disable KCQ to avoid CPC touch memory not valid anymore */
4861         gfx_v8_0_kcq_disable(adev);
4862
4863         if (amdgpu_sriov_vf(adev)) {
4864                 pr_debug("For SRIOV client, shouldn't do anything.\n");
4865                 return 0;
4866         }
4867         amdgpu_gfx_rlc_enter_safe_mode(adev);
4868         if (!gfx_v8_0_wait_for_idle(adev))
4869                 gfx_v8_0_cp_enable(adev, false);
4870         else
4871                 pr_err("cp is busy, skip halt cp\n");
4872         if (!gfx_v8_0_wait_for_rlc_idle(adev))
4873                 adev->gfx.rlc.funcs->stop(adev);
4874         else
4875                 pr_err("rlc is busy, skip halt rlc\n");
4876         amdgpu_gfx_rlc_exit_safe_mode(adev);
4877         return 0;
4878 }
4879
4880 static int gfx_v8_0_suspend(void *handle)
4881 {
4882         return gfx_v8_0_hw_fini(handle);
4883 }
4884
4885 static int gfx_v8_0_resume(void *handle)
4886 {
4887         return gfx_v8_0_hw_init(handle);
4888 }
4889
4890 static bool gfx_v8_0_check_soft_reset(void *handle)
4891 {
4892         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4893         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4894         u32 tmp;
4895
4896         /* GRBM_STATUS */
4897         tmp = RREG32(mmGRBM_STATUS);
4898         if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
4899                    GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
4900                    GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
4901                    GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
4902                    GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
4903                    GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK |
4904                    GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
4905                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4906                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4907                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4908                                                 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
4909                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4910                                                 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
4911         }
4912
4913         /* GRBM_STATUS2 */
4914         tmp = RREG32(mmGRBM_STATUS2);
4915         if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
4916                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4917                                                 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4918
4919         if (REG_GET_FIELD(tmp, GRBM_STATUS2, CPF_BUSY) ||
4920             REG_GET_FIELD(tmp, GRBM_STATUS2, CPC_BUSY) ||
4921             REG_GET_FIELD(tmp, GRBM_STATUS2, CPG_BUSY)) {
4922                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4923                                                 SOFT_RESET_CPF, 1);
4924                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4925                                                 SOFT_RESET_CPC, 1);
4926                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4927                                                 SOFT_RESET_CPG, 1);
4928                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET,
4929                                                 SOFT_RESET_GRBM, 1);
4930         }
4931
4932         /* SRBM_STATUS */
4933         tmp = RREG32(mmSRBM_STATUS);
4934         if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
4935                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4936                                                 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
4937         if (REG_GET_FIELD(tmp, SRBM_STATUS, SEM_BUSY))
4938                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4939                                                 SRBM_SOFT_RESET, SOFT_RESET_SEM, 1);
4940
4941         if (grbm_soft_reset || srbm_soft_reset) {
4942                 adev->gfx.grbm_soft_reset = grbm_soft_reset;
4943                 adev->gfx.srbm_soft_reset = srbm_soft_reset;
4944                 return true;
4945         } else {
4946                 adev->gfx.grbm_soft_reset = 0;
4947                 adev->gfx.srbm_soft_reset = 0;
4948                 return false;
4949         }
4950 }
4951
4952 static int gfx_v8_0_pre_soft_reset(void *handle)
4953 {
4954         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4955         u32 grbm_soft_reset = 0;
4956
4957         if ((!adev->gfx.grbm_soft_reset) &&
4958             (!adev->gfx.srbm_soft_reset))
4959                 return 0;
4960
4961         grbm_soft_reset = adev->gfx.grbm_soft_reset;
4962
4963         /* stop the rlc */
4964         adev->gfx.rlc.funcs->stop(adev);
4965
4966         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
4967             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
4968                 /* Disable GFX parsing/prefetching */
4969                 gfx_v8_0_cp_gfx_enable(adev, false);
4970
4971         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
4972             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
4973             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
4974             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
4975                 int i;
4976
4977                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4978                         struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4979
4980                         mutex_lock(&adev->srbm_mutex);
4981                         vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4982                         gfx_v8_0_deactivate_hqd(adev, 2);
4983                         vi_srbm_select(adev, 0, 0, 0, 0);
4984                         mutex_unlock(&adev->srbm_mutex);
4985                 }
4986                 /* Disable MEC parsing/prefetching */
4987                 gfx_v8_0_cp_compute_enable(adev, false);
4988         }
4989
4990        return 0;
4991 }
4992
4993 static int gfx_v8_0_soft_reset(void *handle)
4994 {
4995         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4996         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4997         u32 tmp;
4998
4999         if ((!adev->gfx.grbm_soft_reset) &&
5000             (!adev->gfx.srbm_soft_reset))
5001                 return 0;
5002
5003         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5004         srbm_soft_reset = adev->gfx.srbm_soft_reset;
5005
5006         if (grbm_soft_reset || srbm_soft_reset) {
5007                 tmp = RREG32(mmGMCON_DEBUG);
5008                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 1);
5009                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 1);
5010                 WREG32(mmGMCON_DEBUG, tmp);
5011                 udelay(50);
5012         }
5013
5014         if (grbm_soft_reset) {
5015                 tmp = RREG32(mmGRBM_SOFT_RESET);
5016                 tmp |= grbm_soft_reset;
5017                 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5018                 WREG32(mmGRBM_SOFT_RESET, tmp);
5019                 tmp = RREG32(mmGRBM_SOFT_RESET);
5020
5021                 udelay(50);
5022
5023                 tmp &= ~grbm_soft_reset;
5024                 WREG32(mmGRBM_SOFT_RESET, tmp);
5025                 tmp = RREG32(mmGRBM_SOFT_RESET);
5026         }
5027
5028         if (srbm_soft_reset) {
5029                 tmp = RREG32(mmSRBM_SOFT_RESET);
5030                 tmp |= srbm_soft_reset;
5031                 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5032                 WREG32(mmSRBM_SOFT_RESET, tmp);
5033                 tmp = RREG32(mmSRBM_SOFT_RESET);
5034
5035                 udelay(50);
5036
5037                 tmp &= ~srbm_soft_reset;
5038                 WREG32(mmSRBM_SOFT_RESET, tmp);
5039                 tmp = RREG32(mmSRBM_SOFT_RESET);
5040         }
5041
5042         if (grbm_soft_reset || srbm_soft_reset) {
5043                 tmp = RREG32(mmGMCON_DEBUG);
5044                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 0);
5045                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 0);
5046                 WREG32(mmGMCON_DEBUG, tmp);
5047         }
5048
5049         /* Wait a little for things to settle down */
5050         udelay(50);
5051
5052         return 0;
5053 }
5054
5055 static int gfx_v8_0_post_soft_reset(void *handle)
5056 {
5057         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5058         u32 grbm_soft_reset = 0;
5059
5060         if ((!adev->gfx.grbm_soft_reset) &&
5061             (!adev->gfx.srbm_soft_reset))
5062                 return 0;
5063
5064         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5065
5066         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5067             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5068             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5069             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5070                 int i;
5071
5072                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5073                         struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5074
5075                         mutex_lock(&adev->srbm_mutex);
5076                         vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5077                         gfx_v8_0_deactivate_hqd(adev, 2);
5078                         vi_srbm_select(adev, 0, 0, 0, 0);
5079                         mutex_unlock(&adev->srbm_mutex);
5080                 }
5081                 gfx_v8_0_kiq_resume(adev);
5082                 gfx_v8_0_kcq_resume(adev);
5083         }
5084
5085         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5086             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5087                 gfx_v8_0_cp_gfx_resume(adev);
5088
5089         adev->gfx.rlc.funcs->start(adev);
5090
5091         return 0;
5092 }
5093
5094 /**
5095  * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
5096  *
5097  * @adev: amdgpu_device pointer
5098  *
5099  * Fetches a GPU clock counter snapshot.
5100  * Returns the 64 bit clock counter snapshot.
5101  */
5102 static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
5103 {
5104         uint64_t clock;
5105
5106         mutex_lock(&adev->gfx.gpu_clock_mutex);
5107         WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
5108         clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
5109                 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5110         mutex_unlock(&adev->gfx.gpu_clock_mutex);
5111         return clock;
5112 }
5113
5114 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
5115                                           uint32_t vmid,
5116                                           uint32_t gds_base, uint32_t gds_size,
5117                                           uint32_t gws_base, uint32_t gws_size,
5118                                           uint32_t oa_base, uint32_t oa_size)
5119 {
5120         /* GDS Base */
5121         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5122         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5123                                 WRITE_DATA_DST_SEL(0)));
5124         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
5125         amdgpu_ring_write(ring, 0);
5126         amdgpu_ring_write(ring, gds_base);
5127
5128         /* GDS Size */
5129         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5130         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5131                                 WRITE_DATA_DST_SEL(0)));
5132         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
5133         amdgpu_ring_write(ring, 0);
5134         amdgpu_ring_write(ring, gds_size);
5135
5136         /* GWS */
5137         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5138         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5139                                 WRITE_DATA_DST_SEL(0)));
5140         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
5141         amdgpu_ring_write(ring, 0);
5142         amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
5143
5144         /* OA */
5145         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5146         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5147                                 WRITE_DATA_DST_SEL(0)));
5148         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
5149         amdgpu_ring_write(ring, 0);
5150         amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
5151 }
5152
5153 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
5154 {
5155         WREG32(mmSQ_IND_INDEX,
5156                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5157                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5158                 (address << SQ_IND_INDEX__INDEX__SHIFT) |
5159                 (SQ_IND_INDEX__FORCE_READ_MASK));
5160         return RREG32(mmSQ_IND_DATA);
5161 }
5162
5163 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
5164                            uint32_t wave, uint32_t thread,
5165                            uint32_t regno, uint32_t num, uint32_t *out)
5166 {
5167         WREG32(mmSQ_IND_INDEX,
5168                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5169                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5170                 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
5171                 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
5172                 (SQ_IND_INDEX__FORCE_READ_MASK) |
5173                 (SQ_IND_INDEX__AUTO_INCR_MASK));
5174         while (num--)
5175                 *(out++) = RREG32(mmSQ_IND_DATA);
5176 }
5177
5178 static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
5179 {
5180         /* type 0 wave data */
5181         dst[(*no_fields)++] = 0;
5182         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
5183         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
5184         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
5185         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
5186         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
5187         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
5188         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
5189         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
5190         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
5191         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
5192         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
5193         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
5194         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_LO);
5195         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_HI);
5196         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_LO);
5197         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI);
5198         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
5199         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
5200 }
5201
5202 static void gfx_v8_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
5203                                      uint32_t wave, uint32_t start,
5204                                      uint32_t size, uint32_t *dst)
5205 {
5206         wave_read_regs(
5207                 adev, simd, wave, 0,
5208                 start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
5209 }
5210
5211
5212 static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = {
5213         .get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter,
5214         .select_se_sh = &gfx_v8_0_select_se_sh,
5215         .read_wave_data = &gfx_v8_0_read_wave_data,
5216         .read_wave_sgprs = &gfx_v8_0_read_wave_sgprs,
5217         .select_me_pipe_q = &gfx_v8_0_select_me_pipe_q
5218 };
5219
5220 static int gfx_v8_0_early_init(void *handle)
5221 {
5222         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5223
5224         adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
5225         adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
5226         adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
5227         gfx_v8_0_set_ring_funcs(adev);
5228         gfx_v8_0_set_irq_funcs(adev);
5229         gfx_v8_0_set_gds_init(adev);
5230         gfx_v8_0_set_rlc_funcs(adev);
5231
5232         return 0;
5233 }
5234
5235 static int gfx_v8_0_late_init(void *handle)
5236 {
5237         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5238         int r;
5239
5240         r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
5241         if (r)
5242                 return r;
5243
5244         r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
5245         if (r)
5246                 return r;
5247
5248         /* requires IBs so do in late init after IB pool is initialized */
5249         r = gfx_v8_0_do_edc_gpr_workarounds(adev);
5250         if (r)
5251                 return r;
5252
5253         r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0);
5254         if (r) {
5255                 DRM_ERROR("amdgpu_irq_get() failed to get IRQ for EDC, r: %d.\n", r);
5256                 return r;
5257         }
5258
5259         r = amdgpu_irq_get(adev, &adev->gfx.sq_irq, 0);
5260         if (r) {
5261                 DRM_ERROR(
5262                         "amdgpu_irq_get() failed to get IRQ for SQ, r: %d.\n",
5263                         r);
5264                 return r;
5265         }
5266
5267         return 0;
5268 }
5269
5270 static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
5271                                                        bool enable)
5272 {
5273         if (((adev->asic_type == CHIP_POLARIS11) ||
5274             (adev->asic_type == CHIP_POLARIS12) ||
5275             (adev->asic_type == CHIP_VEGAM)) &&
5276             adev->powerplay.pp_funcs->set_powergating_by_smu)
5277                 /* Send msg to SMU via Powerplay */
5278                 amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, enable);
5279
5280         WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0);
5281 }
5282
5283 static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
5284                                                         bool enable)
5285 {
5286         WREG32_FIELD(RLC_PG_CNTL, DYN_PER_CU_PG_ENABLE, enable ? 1 : 0);
5287 }
5288
5289 static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
5290                 bool enable)
5291 {
5292         WREG32_FIELD(RLC_PG_CNTL, QUICK_PG_ENABLE, enable ? 1 : 0);
5293 }
5294
5295 static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
5296                                           bool enable)
5297 {
5298         WREG32_FIELD(RLC_PG_CNTL, GFX_POWER_GATING_ENABLE, enable ? 1 : 0);
5299 }
5300
5301 static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev,
5302                                                 bool enable)
5303 {
5304         WREG32_FIELD(RLC_PG_CNTL, GFX_PIPELINE_PG_ENABLE, enable ? 1 : 0);
5305
5306         /* Read any GFX register to wake up GFX. */
5307         if (!enable)
5308                 RREG32(mmDB_RENDER_CONTROL);
5309 }
5310
5311 static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev,
5312                                           bool enable)
5313 {
5314         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
5315                 cz_enable_gfx_cg_power_gating(adev, true);
5316                 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
5317                         cz_enable_gfx_pipeline_power_gating(adev, true);
5318         } else {
5319                 cz_enable_gfx_cg_power_gating(adev, false);
5320                 cz_enable_gfx_pipeline_power_gating(adev, false);
5321         }
5322 }
5323
5324 static int gfx_v8_0_set_powergating_state(void *handle,
5325                                           enum amd_powergating_state state)
5326 {
5327         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5328         bool enable = (state == AMD_PG_STATE_GATE);
5329
5330         if (amdgpu_sriov_vf(adev))
5331                 return 0;
5332
5333         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_SMG |
5334                                 AMD_PG_SUPPORT_RLC_SMU_HS |
5335                                 AMD_PG_SUPPORT_CP |
5336                                 AMD_PG_SUPPORT_GFX_DMG))
5337                 amdgpu_gfx_rlc_enter_safe_mode(adev);
5338         switch (adev->asic_type) {
5339         case CHIP_CARRIZO:
5340         case CHIP_STONEY:
5341
5342                 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
5343                         cz_enable_sck_slow_down_on_power_up(adev, true);
5344                         cz_enable_sck_slow_down_on_power_down(adev, true);
5345                 } else {
5346                         cz_enable_sck_slow_down_on_power_up(adev, false);
5347                         cz_enable_sck_slow_down_on_power_down(adev, false);
5348                 }
5349                 if (adev->pg_flags & AMD_PG_SUPPORT_CP)
5350                         cz_enable_cp_power_gating(adev, true);
5351                 else
5352                         cz_enable_cp_power_gating(adev, false);
5353
5354                 cz_update_gfx_cg_power_gating(adev, enable);
5355
5356                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5357                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5358                 else
5359                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5360
5361                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5362                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5363                 else
5364                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5365                 break;
5366         case CHIP_POLARIS11:
5367         case CHIP_POLARIS12:
5368         case CHIP_VEGAM:
5369                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5370                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5371                 else
5372                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5373
5374                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5375                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5376                 else
5377                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5378
5379                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable)
5380                         polaris11_enable_gfx_quick_mg_power_gating(adev, true);
5381                 else
5382                         polaris11_enable_gfx_quick_mg_power_gating(adev, false);
5383                 break;
5384         default:
5385                 break;
5386         }
5387         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_SMG |
5388                                 AMD_PG_SUPPORT_RLC_SMU_HS |
5389                                 AMD_PG_SUPPORT_CP |
5390                                 AMD_PG_SUPPORT_GFX_DMG))
5391                 amdgpu_gfx_rlc_exit_safe_mode(adev);
5392         return 0;
5393 }
5394
5395 static void gfx_v8_0_get_clockgating_state(void *handle, u32 *flags)
5396 {
5397         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5398         int data;
5399
5400         if (amdgpu_sriov_vf(adev))
5401                 *flags = 0;
5402
5403         /* AMD_CG_SUPPORT_GFX_MGCG */
5404         data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5405         if (!(data & RLC_CGTT_MGCG_OVERRIDE__CPF_MASK))
5406                 *flags |= AMD_CG_SUPPORT_GFX_MGCG;
5407
5408         /* AMD_CG_SUPPORT_GFX_CGLG */
5409         data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5410         if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5411                 *flags |= AMD_CG_SUPPORT_GFX_CGCG;
5412
5413         /* AMD_CG_SUPPORT_GFX_CGLS */
5414         if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5415                 *flags |= AMD_CG_SUPPORT_GFX_CGLS;
5416
5417         /* AMD_CG_SUPPORT_GFX_CGTS */
5418         data = RREG32(mmCGTS_SM_CTRL_REG);
5419         if (!(data & CGTS_SM_CTRL_REG__OVERRIDE_MASK))
5420                 *flags |= AMD_CG_SUPPORT_GFX_CGTS;
5421
5422         /* AMD_CG_SUPPORT_GFX_CGTS_LS */
5423         if (!(data & CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK))
5424                 *flags |= AMD_CG_SUPPORT_GFX_CGTS_LS;
5425
5426         /* AMD_CG_SUPPORT_GFX_RLC_LS */
5427         data = RREG32(mmRLC_MEM_SLP_CNTL);
5428         if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
5429                 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
5430
5431         /* AMD_CG_SUPPORT_GFX_CP_LS */
5432         data = RREG32(mmCP_MEM_SLP_CNTL);
5433         if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
5434                 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
5435 }
5436
5437 static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
5438                                      uint32_t reg_addr, uint32_t cmd)
5439 {
5440         uint32_t data;
5441
5442         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
5443
5444         WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5445         WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5446
5447         data = RREG32(mmRLC_SERDES_WR_CTRL);
5448         if (adev->asic_type == CHIP_STONEY)
5449                 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5450                           RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5451                           RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5452                           RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5453                           RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5454                           RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5455                           RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5456                           RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5457                           RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5458         else
5459                 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5460                           RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5461                           RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5462                           RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5463                           RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5464                           RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5465                           RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5466                           RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5467                           RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
5468                           RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
5469                           RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5470         data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
5471                  (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
5472                  (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
5473                  (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
5474
5475         WREG32(mmRLC_SERDES_WR_CTRL, data);
5476 }
5477
5478 #define MSG_ENTER_RLC_SAFE_MODE     1
5479 #define MSG_EXIT_RLC_SAFE_MODE      0
5480 #define RLC_GPR_REG2__REQ_MASK 0x00000001
5481 #define RLC_GPR_REG2__REQ__SHIFT 0
5482 #define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001
5483 #define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e
5484
5485 static bool gfx_v8_0_is_rlc_enabled(struct amdgpu_device *adev)
5486 {
5487         uint32_t rlc_setting;
5488
5489         rlc_setting = RREG32(mmRLC_CNTL);
5490         if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
5491                 return false;
5492
5493         return true;
5494 }
5495
5496 static void gfx_v8_0_set_safe_mode(struct amdgpu_device *adev)
5497 {
5498         uint32_t data;
5499         unsigned i;
5500         data = RREG32(mmRLC_CNTL);
5501         data |= RLC_SAFE_MODE__CMD_MASK;
5502         data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5503         data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
5504         WREG32(mmRLC_SAFE_MODE, data);
5505
5506         /* wait for RLC_SAFE_MODE */
5507         for (i = 0; i < adev->usec_timeout; i++) {
5508                 if ((RREG32(mmRLC_GPM_STAT) &
5509                      (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5510                       RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5511                     (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5512                      RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5513                         break;
5514                 udelay(1);
5515         }
5516         for (i = 0; i < adev->usec_timeout; i++) {
5517                 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5518                         break;
5519                 udelay(1);
5520         }
5521 }
5522
5523 static void gfx_v8_0_unset_safe_mode(struct amdgpu_device *adev)
5524 {
5525         uint32_t data;
5526         unsigned i;
5527
5528         data = RREG32(mmRLC_CNTL);
5529         data |= RLC_SAFE_MODE__CMD_MASK;
5530         data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5531         WREG32(mmRLC_SAFE_MODE, data);
5532
5533         for (i = 0; i < adev->usec_timeout; i++) {
5534                 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5535                         break;
5536                 udelay(1);
5537         }
5538 }
5539
5540 static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
5541         .is_rlc_enabled = gfx_v8_0_is_rlc_enabled,
5542         .set_safe_mode = gfx_v8_0_set_safe_mode,
5543         .unset_safe_mode = gfx_v8_0_unset_safe_mode,
5544         .init = gfx_v8_0_rlc_init,
5545         .get_csb_size = gfx_v8_0_get_csb_size,
5546         .get_csb_buffer = gfx_v8_0_get_csb_buffer,
5547         .get_cp_table_num = gfx_v8_0_cp_jump_table_num,
5548         .resume = gfx_v8_0_rlc_resume,
5549         .stop = gfx_v8_0_rlc_stop,
5550         .reset = gfx_v8_0_rlc_reset,
5551         .start = gfx_v8_0_rlc_start
5552 };
5553
5554 static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
5555                                                       bool enable)
5556 {
5557         uint32_t temp, data;
5558
5559         amdgpu_gfx_rlc_enter_safe_mode(adev);
5560
5561         /* It is disabled by HW by default */
5562         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
5563                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5564                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS)
5565                                 /* 1 - RLC memory Light sleep */
5566                                 WREG32_FIELD(RLC_MEM_SLP_CNTL, RLC_MEM_LS_EN, 1);
5567
5568                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS)
5569                                 WREG32_FIELD(CP_MEM_SLP_CNTL, CP_MEM_LS_EN, 1);
5570                 }
5571
5572                 /* 3 - RLC_CGTT_MGCG_OVERRIDE */
5573                 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5574                 if (adev->flags & AMD_IS_APU)
5575                         data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5576                                   RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5577                                   RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK);
5578                 else
5579                         data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5580                                   RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5581                                   RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5582                                   RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5583
5584                 if (temp != data)
5585                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5586
5587                 /* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5588                 gfx_v8_0_wait_for_rlc_serdes(adev);
5589
5590                 /* 5 - clear mgcg override */
5591                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5592
5593                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
5594                         /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
5595                         temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5596                         data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
5597                         data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
5598                         data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
5599                         data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
5600                         if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
5601                             (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
5602                                 data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
5603                         data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
5604                         data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
5605                         if (temp != data)
5606                                 WREG32(mmCGTS_SM_CTRL_REG, data);
5607                 }
5608                 udelay(50);
5609
5610                 /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5611                 gfx_v8_0_wait_for_rlc_serdes(adev);
5612         } else {
5613                 /* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
5614                 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5615                 data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5616                                 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5617                                 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5618                                 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5619                 if (temp != data)
5620                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5621
5622                 /* 2 - disable MGLS in RLC */
5623                 data = RREG32(mmRLC_MEM_SLP_CNTL);
5624                 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
5625                         data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5626                         WREG32(mmRLC_MEM_SLP_CNTL, data);
5627                 }
5628
5629                 /* 3 - disable MGLS in CP */
5630                 data = RREG32(mmCP_MEM_SLP_CNTL);
5631                 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
5632                         data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5633                         WREG32(mmCP_MEM_SLP_CNTL, data);
5634                 }
5635
5636                 /* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
5637                 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5638                 data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
5639                                 CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
5640                 if (temp != data)
5641                         WREG32(mmCGTS_SM_CTRL_REG, data);
5642
5643                 /* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5644                 gfx_v8_0_wait_for_rlc_serdes(adev);
5645
5646                 /* 6 - set mgcg override */
5647                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5648
5649                 udelay(50);
5650
5651                 /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5652                 gfx_v8_0_wait_for_rlc_serdes(adev);
5653         }
5654
5655         amdgpu_gfx_rlc_exit_safe_mode(adev);
5656 }
5657
5658 static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5659                                                       bool enable)
5660 {
5661         uint32_t temp, temp1, data, data1;
5662
5663         temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5664
5665         amdgpu_gfx_rlc_enter_safe_mode(adev);
5666
5667         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
5668                 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5669                 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
5670                 if (temp1 != data1)
5671                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5672
5673                 /* : wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5674                 gfx_v8_0_wait_for_rlc_serdes(adev);
5675
5676                 /* 2 - clear cgcg override */
5677                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5678
5679                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5680                 gfx_v8_0_wait_for_rlc_serdes(adev);
5681
5682                 /* 3 - write cmd to set CGLS */
5683                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
5684
5685                 /* 4 - enable cgcg */
5686                 data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5687
5688                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5689                         /* enable cgls*/
5690                         data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5691
5692                         temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5693                         data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
5694
5695                         if (temp1 != data1)
5696                                 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5697                 } else {
5698                         data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5699                 }
5700
5701                 if (temp != data)
5702                         WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5703
5704                 /* 5 enable cntx_empty_int_enable/cntx_busy_int_enable/
5705                  * Cmp_busy/GFX_Idle interrupts
5706                  */
5707                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5708         } else {
5709                 /* disable cntx_empty_int_enable & GFX Idle interrupt */
5710                 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5711
5712                 /* TEST CGCG */
5713                 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5714                 data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
5715                                 RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
5716                 if (temp1 != data1)
5717                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5718
5719                 /* read gfx register to wake up cgcg */
5720                 RREG32(mmCB_CGTT_SCLK_CTRL);
5721                 RREG32(mmCB_CGTT_SCLK_CTRL);
5722                 RREG32(mmCB_CGTT_SCLK_CTRL);
5723                 RREG32(mmCB_CGTT_SCLK_CTRL);
5724
5725                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5726                 gfx_v8_0_wait_for_rlc_serdes(adev);
5727
5728                 /* write cmd to Set CGCG Overrride */
5729                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5730
5731                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5732                 gfx_v8_0_wait_for_rlc_serdes(adev);
5733
5734                 /* write cmd to Clear CGLS */
5735                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
5736
5737                 /* disable cgcg, cgls should be disabled too. */
5738                 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
5739                           RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
5740                 if (temp != data)
5741                         WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5742                 /* enable interrupts again for PG */
5743                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5744         }
5745
5746         gfx_v8_0_wait_for_rlc_serdes(adev);
5747
5748         amdgpu_gfx_rlc_exit_safe_mode(adev);
5749 }
5750 static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
5751                                             bool enable)
5752 {
5753         if (enable) {
5754                 /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
5755                  * ===  MGCG + MGLS + TS(CG/LS) ===
5756                  */
5757                 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5758                 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5759         } else {
5760                 /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
5761                  * ===  CGCG + CGLS ===
5762                  */
5763                 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5764                 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5765         }
5766         return 0;
5767 }
5768
5769 static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev,
5770                                           enum amd_clockgating_state state)
5771 {
5772         uint32_t msg_id, pp_state = 0;
5773         uint32_t pp_support_state = 0;
5774
5775         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
5776                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5777                         pp_support_state = PP_STATE_SUPPORT_LS;
5778                         pp_state = PP_STATE_LS;
5779                 }
5780                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
5781                         pp_support_state |= PP_STATE_SUPPORT_CG;
5782                         pp_state |= PP_STATE_CG;
5783                 }
5784                 if (state == AMD_CG_STATE_UNGATE)
5785                         pp_state = 0;
5786
5787                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5788                                 PP_BLOCK_GFX_CG,
5789                                 pp_support_state,
5790                                 pp_state);
5791                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
5792                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5793         }
5794
5795         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
5796                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5797                         pp_support_state = PP_STATE_SUPPORT_LS;
5798                         pp_state = PP_STATE_LS;
5799                 }
5800
5801                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
5802                         pp_support_state |= PP_STATE_SUPPORT_CG;
5803                         pp_state |= PP_STATE_CG;
5804                 }
5805
5806                 if (state == AMD_CG_STATE_UNGATE)
5807                         pp_state = 0;
5808
5809                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5810                                 PP_BLOCK_GFX_MG,
5811                                 pp_support_state,
5812                                 pp_state);
5813                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
5814                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5815         }
5816
5817         return 0;
5818 }
5819
5820 static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev,
5821                                           enum amd_clockgating_state state)
5822 {
5823
5824         uint32_t msg_id, pp_state = 0;
5825         uint32_t pp_support_state = 0;
5826
5827         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
5828                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5829                         pp_support_state = PP_STATE_SUPPORT_LS;
5830                         pp_state = PP_STATE_LS;
5831                 }
5832                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
5833                         pp_support_state |= PP_STATE_SUPPORT_CG;
5834                         pp_state |= PP_STATE_CG;
5835                 }
5836                 if (state == AMD_CG_STATE_UNGATE)
5837                         pp_state = 0;
5838
5839                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5840                                 PP_BLOCK_GFX_CG,
5841                                 pp_support_state,
5842                                 pp_state);
5843                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
5844                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5845         }
5846
5847         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_GFX_3D_CGLS)) {
5848                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) {
5849                         pp_support_state = PP_STATE_SUPPORT_LS;
5850                         pp_state = PP_STATE_LS;
5851                 }
5852                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) {
5853                         pp_support_state |= PP_STATE_SUPPORT_CG;
5854                         pp_state |= PP_STATE_CG;
5855                 }
5856                 if (state == AMD_CG_STATE_UNGATE)
5857                         pp_state = 0;
5858
5859                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5860                                 PP_BLOCK_GFX_3D,
5861                                 pp_support_state,
5862                                 pp_state);
5863                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
5864                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5865         }
5866
5867         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
5868                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5869                         pp_support_state = PP_STATE_SUPPORT_LS;
5870                         pp_state = PP_STATE_LS;
5871                 }
5872
5873                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
5874                         pp_support_state |= PP_STATE_SUPPORT_CG;
5875                         pp_state |= PP_STATE_CG;
5876                 }
5877
5878                 if (state == AMD_CG_STATE_UNGATE)
5879                         pp_state = 0;
5880
5881                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5882                                 PP_BLOCK_GFX_MG,
5883                                 pp_support_state,
5884                                 pp_state);
5885                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
5886                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5887         }
5888
5889         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
5890                 pp_support_state = PP_STATE_SUPPORT_LS;
5891
5892                 if (state == AMD_CG_STATE_UNGATE)
5893                         pp_state = 0;
5894                 else
5895                         pp_state = PP_STATE_LS;
5896
5897                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5898                                 PP_BLOCK_GFX_RLC,
5899                                 pp_support_state,
5900                                 pp_state);
5901                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
5902                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5903         }
5904
5905         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
5906                 pp_support_state = PP_STATE_SUPPORT_LS;
5907
5908                 if (state == AMD_CG_STATE_UNGATE)
5909                         pp_state = 0;
5910                 else
5911                         pp_state = PP_STATE_LS;
5912                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5913                         PP_BLOCK_GFX_CP,
5914                         pp_support_state,
5915                         pp_state);
5916                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
5917                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5918         }
5919
5920         return 0;
5921 }
5922
5923 static int gfx_v8_0_set_clockgating_state(void *handle,
5924                                           enum amd_clockgating_state state)
5925 {
5926         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5927
5928         if (amdgpu_sriov_vf(adev))
5929                 return 0;
5930
5931         switch (adev->asic_type) {
5932         case CHIP_FIJI:
5933         case CHIP_CARRIZO:
5934         case CHIP_STONEY:
5935                 gfx_v8_0_update_gfx_clock_gating(adev,
5936                                                  state == AMD_CG_STATE_GATE);
5937                 break;
5938         case CHIP_TONGA:
5939                 gfx_v8_0_tonga_update_gfx_clock_gating(adev, state);
5940                 break;
5941         case CHIP_POLARIS10:
5942         case CHIP_POLARIS11:
5943         case CHIP_POLARIS12:
5944         case CHIP_VEGAM:
5945                 gfx_v8_0_polaris_update_gfx_clock_gating(adev, state);
5946                 break;
5947         default:
5948                 break;
5949         }
5950         return 0;
5951 }
5952
5953 static u64 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring)
5954 {
5955         return ring->adev->wb.wb[ring->rptr_offs];
5956 }
5957
5958 static u64 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
5959 {
5960         struct amdgpu_device *adev = ring->adev;
5961
5962         if (ring->use_doorbell)
5963                 /* XXX check if swapping is necessary on BE */
5964                 return ring->adev->wb.wb[ring->wptr_offs];
5965         else
5966                 return RREG32(mmCP_RB0_WPTR);
5967 }
5968
5969 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
5970 {
5971         struct amdgpu_device *adev = ring->adev;
5972
5973         if (ring->use_doorbell) {
5974                 /* XXX check if swapping is necessary on BE */
5975                 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
5976                 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
5977         } else {
5978                 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
5979                 (void)RREG32(mmCP_RB0_WPTR);
5980         }
5981 }
5982
5983 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
5984 {
5985         u32 ref_and_mask, reg_mem_engine;
5986
5987         if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) ||
5988             (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)) {
5989                 switch (ring->me) {
5990                 case 1:
5991                         ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
5992                         break;
5993                 case 2:
5994                         ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
5995                         break;
5996                 default:
5997                         return;
5998                 }
5999                 reg_mem_engine = 0;
6000         } else {
6001                 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
6002                 reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
6003         }
6004
6005         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6006         amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
6007                                  WAIT_REG_MEM_FUNCTION(3) |  /* == */
6008                                  reg_mem_engine));
6009         amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
6010         amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
6011         amdgpu_ring_write(ring, ref_and_mask);
6012         amdgpu_ring_write(ring, ref_and_mask);
6013         amdgpu_ring_write(ring, 0x20); /* poll interval */
6014 }
6015
6016 static void gfx_v8_0_ring_emit_vgt_flush(struct amdgpu_ring *ring)
6017 {
6018         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6019         amdgpu_ring_write(ring, EVENT_TYPE(VS_PARTIAL_FLUSH) |
6020                 EVENT_INDEX(4));
6021
6022         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6023         amdgpu_ring_write(ring, EVENT_TYPE(VGT_FLUSH) |
6024                 EVENT_INDEX(0));
6025 }
6026
6027 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
6028                                         struct amdgpu_job *job,
6029                                         struct amdgpu_ib *ib,
6030                                         bool ctx_switch)
6031 {
6032         unsigned vmid = AMDGPU_JOB_GET_VMID(job);
6033         u32 header, control = 0;
6034
6035         if (ib->flags & AMDGPU_IB_FLAG_CE)
6036                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
6037         else
6038                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
6039
6040         control |= ib->length_dw | (vmid << 24);
6041
6042         if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
6043                 control |= INDIRECT_BUFFER_PRE_ENB(1);
6044
6045                 if (!(ib->flags & AMDGPU_IB_FLAG_CE))
6046                         gfx_v8_0_ring_emit_de_meta(ring);
6047         }
6048
6049         amdgpu_ring_write(ring, header);
6050         amdgpu_ring_write(ring,
6051 #ifdef __BIG_ENDIAN
6052                           (2 << 0) |
6053 #endif
6054                           (ib->gpu_addr & 0xFFFFFFFC));
6055         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6056         amdgpu_ring_write(ring, control);
6057 }
6058
6059 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
6060                                           struct amdgpu_job *job,
6061                                           struct amdgpu_ib *ib,
6062                                           bool ctx_switch)
6063 {
6064         unsigned vmid = AMDGPU_JOB_GET_VMID(job);
6065         u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
6066
6067         amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
6068         amdgpu_ring_write(ring,
6069 #ifdef __BIG_ENDIAN
6070                                 (2 << 0) |
6071 #endif
6072                                 (ib->gpu_addr & 0xFFFFFFFC));
6073         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6074         amdgpu_ring_write(ring, control);
6075 }
6076
6077 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
6078                                          u64 seq, unsigned flags)
6079 {
6080         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6081         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6082
6083         /* EVENT_WRITE_EOP - flush caches, send int */
6084         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6085         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6086                                  EOP_TC_ACTION_EN |
6087                                  EOP_TC_WB_ACTION_EN |
6088                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6089                                  EVENT_INDEX(5)));
6090         amdgpu_ring_write(ring, addr & 0xfffffffc);
6091         amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
6092                           DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6093         amdgpu_ring_write(ring, lower_32_bits(seq));
6094         amdgpu_ring_write(ring, upper_32_bits(seq));
6095
6096 }
6097
6098 static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
6099 {
6100         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6101         uint32_t seq = ring->fence_drv.sync_seq;
6102         uint64_t addr = ring->fence_drv.gpu_addr;
6103
6104         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6105         amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
6106                                  WAIT_REG_MEM_FUNCTION(3) | /* equal */
6107                                  WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
6108         amdgpu_ring_write(ring, addr & 0xfffffffc);
6109         amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
6110         amdgpu_ring_write(ring, seq);
6111         amdgpu_ring_write(ring, 0xffffffff);
6112         amdgpu_ring_write(ring, 4); /* poll interval */
6113 }
6114
6115 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
6116                                         unsigned vmid, uint64_t pd_addr)
6117 {
6118         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6119
6120         amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
6121
6122         /* wait for the invalidate to complete */
6123         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6124         amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6125                                  WAIT_REG_MEM_FUNCTION(0) |  /* always */
6126                                  WAIT_REG_MEM_ENGINE(0))); /* me */
6127         amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6128         amdgpu_ring_write(ring, 0);
6129         amdgpu_ring_write(ring, 0); /* ref */
6130         amdgpu_ring_write(ring, 0); /* mask */
6131         amdgpu_ring_write(ring, 0x20); /* poll interval */
6132
6133         /* compute doesn't have PFP */
6134         if (usepfp) {
6135                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
6136                 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6137                 amdgpu_ring_write(ring, 0x0);
6138         }
6139 }
6140
6141 static u64 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
6142 {
6143         return ring->adev->wb.wb[ring->wptr_offs];
6144 }
6145
6146 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
6147 {
6148         struct amdgpu_device *adev = ring->adev;
6149
6150         /* XXX check if swapping is necessary on BE */
6151         adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6152         WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6153 }
6154
6155 static void gfx_v8_0_ring_set_pipe_percent(struct amdgpu_ring *ring,
6156                                            bool acquire)
6157 {
6158         struct amdgpu_device *adev = ring->adev;
6159         int pipe_num, tmp, reg;
6160         int pipe_percent = acquire ? SPI_WCL_PIPE_PERCENT_GFX__VALUE_MASK : 0x1;
6161
6162         pipe_num = ring->me * adev->gfx.mec.num_pipe_per_mec + ring->pipe;
6163
6164         /* first me only has 2 entries, GFX and HP3D */
6165         if (ring->me > 0)
6166                 pipe_num -= 2;
6167
6168         reg = mmSPI_WCL_PIPE_PERCENT_GFX + pipe_num;
6169         tmp = RREG32(reg);
6170         tmp = REG_SET_FIELD(tmp, SPI_WCL_PIPE_PERCENT_GFX, VALUE, pipe_percent);
6171         WREG32(reg, tmp);
6172 }
6173
6174 static void gfx_v8_0_pipe_reserve_resources(struct amdgpu_device *adev,
6175                                             struct amdgpu_ring *ring,
6176                                             bool acquire)
6177 {
6178         int i, pipe;
6179         bool reserve;
6180         struct amdgpu_ring *iring;
6181
6182         mutex_lock(&adev->gfx.pipe_reserve_mutex);
6183         pipe = amdgpu_gfx_queue_to_bit(adev, ring->me, ring->pipe, 0);
6184         if (acquire)
6185                 set_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6186         else
6187                 clear_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6188
6189         if (!bitmap_weight(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)) {
6190                 /* Clear all reservations - everyone reacquires all resources */
6191                 for (i = 0; i < adev->gfx.num_gfx_rings; ++i)
6192                         gfx_v8_0_ring_set_pipe_percent(&adev->gfx.gfx_ring[i],
6193                                                        true);
6194
6195                 for (i = 0; i < adev->gfx.num_compute_rings; ++i)
6196                         gfx_v8_0_ring_set_pipe_percent(&adev->gfx.compute_ring[i],
6197                                                        true);
6198         } else {
6199                 /* Lower all pipes without a current reservation */
6200                 for (i = 0; i < adev->gfx.num_gfx_rings; ++i) {
6201                         iring = &adev->gfx.gfx_ring[i];
6202                         pipe = amdgpu_gfx_queue_to_bit(adev,
6203                                                        iring->me,
6204                                                        iring->pipe,
6205                                                        0);
6206                         reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6207                         gfx_v8_0_ring_set_pipe_percent(iring, reserve);
6208                 }
6209
6210                 for (i = 0; i < adev->gfx.num_compute_rings; ++i) {
6211                         iring = &adev->gfx.compute_ring[i];
6212                         pipe = amdgpu_gfx_queue_to_bit(adev,
6213                                                        iring->me,
6214                                                        iring->pipe,
6215                                                        0);
6216                         reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6217                         gfx_v8_0_ring_set_pipe_percent(iring, reserve);
6218                 }
6219         }
6220
6221         mutex_unlock(&adev->gfx.pipe_reserve_mutex);
6222 }
6223
6224 static void gfx_v8_0_hqd_set_priority(struct amdgpu_device *adev,
6225                                       struct amdgpu_ring *ring,
6226                                       bool acquire)
6227 {
6228         uint32_t pipe_priority = acquire ? 0x2 : 0x0;
6229         uint32_t queue_priority = acquire ? 0xf : 0x0;
6230
6231         mutex_lock(&adev->srbm_mutex);
6232         vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
6233
6234         WREG32(mmCP_HQD_PIPE_PRIORITY, pipe_priority);
6235         WREG32(mmCP_HQD_QUEUE_PRIORITY, queue_priority);
6236
6237         vi_srbm_select(adev, 0, 0, 0, 0);
6238         mutex_unlock(&adev->srbm_mutex);
6239 }
6240 static void gfx_v8_0_ring_set_priority_compute(struct amdgpu_ring *ring,
6241                                                enum drm_sched_priority priority)
6242 {
6243         struct amdgpu_device *adev = ring->adev;
6244         bool acquire = priority == DRM_SCHED_PRIORITY_HIGH_HW;
6245
6246         if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE)
6247                 return;
6248
6249         gfx_v8_0_hqd_set_priority(adev, ring, acquire);
6250         gfx_v8_0_pipe_reserve_resources(adev, ring, acquire);
6251 }
6252
6253 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
6254                                              u64 addr, u64 seq,
6255                                              unsigned flags)
6256 {
6257         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6258         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6259
6260         /* RELEASE_MEM - flush caches, send int */
6261         amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
6262         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6263                                  EOP_TC_ACTION_EN |
6264                                  EOP_TC_WB_ACTION_EN |
6265                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6266                                  EVENT_INDEX(5)));
6267         amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6268         amdgpu_ring_write(ring, addr & 0xfffffffc);
6269         amdgpu_ring_write(ring, upper_32_bits(addr));
6270         amdgpu_ring_write(ring, lower_32_bits(seq));
6271         amdgpu_ring_write(ring, upper_32_bits(seq));
6272 }
6273
6274 static void gfx_v8_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
6275                                          u64 seq, unsigned int flags)
6276 {
6277         /* we only allocate 32bit for each seq wb address */
6278         BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
6279
6280         /* write fence seq to the "addr" */
6281         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6282         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6283                                  WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
6284         amdgpu_ring_write(ring, lower_32_bits(addr));
6285         amdgpu_ring_write(ring, upper_32_bits(addr));
6286         amdgpu_ring_write(ring, lower_32_bits(seq));
6287
6288         if (flags & AMDGPU_FENCE_FLAG_INT) {
6289                 /* set register to trigger INT */
6290                 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6291                 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6292                                          WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
6293                 amdgpu_ring_write(ring, mmCPC_INT_STATUS);
6294                 amdgpu_ring_write(ring, 0);
6295                 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
6296         }
6297 }
6298
6299 static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring)
6300 {
6301         amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6302         amdgpu_ring_write(ring, 0);
6303 }
6304
6305 static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
6306 {
6307         uint32_t dw2 = 0;
6308
6309         if (amdgpu_sriov_vf(ring->adev))
6310                 gfx_v8_0_ring_emit_ce_meta(ring);
6311
6312         dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
6313         if (flags & AMDGPU_HAVE_CTX_SWITCH) {
6314                 gfx_v8_0_ring_emit_vgt_flush(ring);
6315                 /* set load_global_config & load_global_uconfig */
6316                 dw2 |= 0x8001;
6317                 /* set load_cs_sh_regs */
6318                 dw2 |= 0x01000000;
6319                 /* set load_per_context_state & load_gfx_sh_regs for GFX */
6320                 dw2 |= 0x10002;
6321
6322                 /* set load_ce_ram if preamble presented */
6323                 if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
6324                         dw2 |= 0x10000000;
6325         } else {
6326                 /* still load_ce_ram if this is the first time preamble presented
6327                  * although there is no context switch happens.
6328                  */
6329                 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
6330                         dw2 |= 0x10000000;
6331         }
6332
6333         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6334         amdgpu_ring_write(ring, dw2);
6335         amdgpu_ring_write(ring, 0);
6336 }
6337
6338 static unsigned gfx_v8_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
6339 {
6340         unsigned ret;
6341
6342         amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
6343         amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
6344         amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
6345         amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
6346         ret = ring->wptr & ring->buf_mask;
6347         amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
6348         return ret;
6349 }
6350
6351 static void gfx_v8_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
6352 {
6353         unsigned cur;
6354
6355         BUG_ON(offset > ring->buf_mask);
6356         BUG_ON(ring->ring[offset] != 0x55aa55aa);
6357
6358         cur = (ring->wptr & ring->buf_mask) - 1;
6359         if (likely(cur > offset))
6360                 ring->ring[offset] = cur - offset;
6361         else
6362                 ring->ring[offset] = (ring->ring_size >> 2) - offset + cur;
6363 }
6364
6365 static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
6366 {
6367         struct amdgpu_device *adev = ring->adev;
6368
6369         amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
6370         amdgpu_ring_write(ring, 0 |     /* src: register*/
6371                                 (5 << 8) |      /* dst: memory */
6372                                 (1 << 20));     /* write confirm */
6373         amdgpu_ring_write(ring, reg);
6374         amdgpu_ring_write(ring, 0);
6375         amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
6376                                 adev->virt.reg_val_offs * 4));
6377         amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
6378                                 adev->virt.reg_val_offs * 4));
6379 }
6380
6381 static void gfx_v8_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
6382                                   uint32_t val)
6383 {
6384         uint32_t cmd;
6385
6386         switch (ring->funcs->type) {
6387         case AMDGPU_RING_TYPE_GFX:
6388                 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
6389                 break;
6390         case AMDGPU_RING_TYPE_KIQ:
6391                 cmd = 1 << 16; /* no inc addr */
6392                 break;
6393         default:
6394                 cmd = WR_CONFIRM;
6395                 break;
6396         }
6397
6398         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6399         amdgpu_ring_write(ring, cmd);
6400         amdgpu_ring_write(ring, reg);
6401         amdgpu_ring_write(ring, 0);
6402         amdgpu_ring_write(ring, val);
6403 }
6404
6405 static void gfx_v8_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
6406 {
6407         struct amdgpu_device *adev = ring->adev;
6408         uint32_t value = 0;
6409
6410         value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
6411         value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
6412         value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
6413         value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
6414         WREG32(mmSQ_CMD, value);
6415 }
6416
6417 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
6418                                                  enum amdgpu_interrupt_state state)
6419 {
6420         WREG32_FIELD(CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE,
6421                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6422 }
6423
6424 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
6425                                                      int me, int pipe,
6426                                                      enum amdgpu_interrupt_state state)
6427 {
6428         u32 mec_int_cntl, mec_int_cntl_reg;
6429
6430         /*
6431          * amdgpu controls only the first MEC. That's why this function only
6432          * handles the setting of interrupts for this specific MEC. All other
6433          * pipes' interrupts are set by amdkfd.
6434          */
6435
6436         if (me == 1) {
6437                 switch (pipe) {
6438                 case 0:
6439                         mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
6440                         break;
6441                 case 1:
6442                         mec_int_cntl_reg = mmCP_ME1_PIPE1_INT_CNTL;
6443                         break;
6444                 case 2:
6445                         mec_int_cntl_reg = mmCP_ME1_PIPE2_INT_CNTL;
6446                         break;
6447                 case 3:
6448                         mec_int_cntl_reg = mmCP_ME1_PIPE3_INT_CNTL;
6449                         break;
6450                 default:
6451                         DRM_DEBUG("invalid pipe %d\n", pipe);
6452                         return;
6453                 }
6454         } else {
6455                 DRM_DEBUG("invalid me %d\n", me);
6456                 return;
6457         }
6458
6459         switch (state) {
6460         case AMDGPU_IRQ_STATE_DISABLE:
6461                 mec_int_cntl = RREG32(mec_int_cntl_reg);
6462                 mec_int_cntl &= ~CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6463                 WREG32(mec_int_cntl_reg, mec_int_cntl);
6464                 break;
6465         case AMDGPU_IRQ_STATE_ENABLE:
6466                 mec_int_cntl = RREG32(mec_int_cntl_reg);
6467                 mec_int_cntl |= CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6468                 WREG32(mec_int_cntl_reg, mec_int_cntl);
6469                 break;
6470         default:
6471                 break;
6472         }
6473 }
6474
6475 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
6476                                              struct amdgpu_irq_src *source,
6477                                              unsigned type,
6478                                              enum amdgpu_interrupt_state state)
6479 {
6480         WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE,
6481                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6482
6483         return 0;
6484 }
6485
6486 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
6487                                               struct amdgpu_irq_src *source,
6488                                               unsigned type,
6489                                               enum amdgpu_interrupt_state state)
6490 {
6491         WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE,
6492                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6493
6494         return 0;
6495 }
6496
6497 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6498                                             struct amdgpu_irq_src *src,
6499                                             unsigned type,
6500                                             enum amdgpu_interrupt_state state)
6501 {
6502         switch (type) {
6503         case AMDGPU_CP_IRQ_GFX_EOP:
6504                 gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
6505                 break;
6506         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6507                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6508                 break;
6509         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6510                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6511                 break;
6512         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6513                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6514                 break;
6515         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6516                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6517                 break;
6518         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6519                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6520                 break;
6521         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6522                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6523                 break;
6524         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6525                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6526                 break;
6527         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6528                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6529                 break;
6530         default:
6531                 break;
6532         }
6533         return 0;
6534 }
6535
6536 static int gfx_v8_0_set_cp_ecc_int_state(struct amdgpu_device *adev,
6537                                          struct amdgpu_irq_src *source,
6538                                          unsigned int type,
6539                                          enum amdgpu_interrupt_state state)
6540 {
6541         int enable_flag;
6542
6543         switch (state) {
6544         case AMDGPU_IRQ_STATE_DISABLE:
6545                 enable_flag = 0;
6546                 break;
6547
6548         case AMDGPU_IRQ_STATE_ENABLE:
6549                 enable_flag = 1;
6550                 break;
6551
6552         default:
6553                 return -EINVAL;
6554         }
6555
6556         WREG32_FIELD(CP_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6557         WREG32_FIELD(CP_INT_CNTL_RING0, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6558         WREG32_FIELD(CP_INT_CNTL_RING1, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6559         WREG32_FIELD(CP_INT_CNTL_RING2, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6560         WREG32_FIELD(CPC_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6561         WREG32_FIELD(CP_ME1_PIPE0_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6562                      enable_flag);
6563         WREG32_FIELD(CP_ME1_PIPE1_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6564                      enable_flag);
6565         WREG32_FIELD(CP_ME1_PIPE2_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6566                      enable_flag);
6567         WREG32_FIELD(CP_ME1_PIPE3_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6568                      enable_flag);
6569         WREG32_FIELD(CP_ME2_PIPE0_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6570                      enable_flag);
6571         WREG32_FIELD(CP_ME2_PIPE1_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6572                      enable_flag);
6573         WREG32_FIELD(CP_ME2_PIPE2_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6574                      enable_flag);
6575         WREG32_FIELD(CP_ME2_PIPE3_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6576                      enable_flag);
6577
6578         return 0;
6579 }
6580
6581 static int gfx_v8_0_set_sq_int_state(struct amdgpu_device *adev,
6582                                      struct amdgpu_irq_src *source,
6583                                      unsigned int type,
6584                                      enum amdgpu_interrupt_state state)
6585 {
6586         int enable_flag;
6587
6588         switch (state) {
6589         case AMDGPU_IRQ_STATE_DISABLE:
6590                 enable_flag = 1;
6591                 break;
6592
6593         case AMDGPU_IRQ_STATE_ENABLE:
6594                 enable_flag = 0;
6595                 break;
6596
6597         default:
6598                 return -EINVAL;
6599         }
6600
6601         WREG32_FIELD(SQ_INTERRUPT_MSG_CTRL, STALL,
6602                      enable_flag);
6603
6604         return 0;
6605 }
6606
6607 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
6608                             struct amdgpu_irq_src *source,
6609                             struct amdgpu_iv_entry *entry)
6610 {
6611         int i;
6612         u8 me_id, pipe_id, queue_id;
6613         struct amdgpu_ring *ring;
6614
6615         DRM_DEBUG("IH: CP EOP\n");
6616         me_id = (entry->ring_id & 0x0c) >> 2;
6617         pipe_id = (entry->ring_id & 0x03) >> 0;
6618         queue_id = (entry->ring_id & 0x70) >> 4;
6619
6620         switch (me_id) {
6621         case 0:
6622                 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6623                 break;
6624         case 1:
6625         case 2:
6626                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6627                         ring = &adev->gfx.compute_ring[i];
6628                         /* Per-queue interrupt is supported for MEC starting from VI.
6629                           * The interrupt can only be enabled/disabled per pipe instead of per queue.
6630                           */
6631                         if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6632                                 amdgpu_fence_process(ring);
6633                 }
6634                 break;
6635         }
6636         return 0;
6637 }
6638
6639 static void gfx_v8_0_fault(struct amdgpu_device *adev,
6640                            struct amdgpu_iv_entry *entry)
6641 {
6642         u8 me_id, pipe_id, queue_id;
6643         struct amdgpu_ring *ring;
6644         int i;
6645
6646         me_id = (entry->ring_id & 0x0c) >> 2;
6647         pipe_id = (entry->ring_id & 0x03) >> 0;
6648         queue_id = (entry->ring_id & 0x70) >> 4;
6649
6650         switch (me_id) {
6651         case 0:
6652                 drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
6653                 break;
6654         case 1:
6655         case 2:
6656                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6657                         ring = &adev->gfx.compute_ring[i];
6658                         if (ring->me == me_id && ring->pipe == pipe_id &&
6659                             ring->queue == queue_id)
6660                                 drm_sched_fault(&ring->sched);
6661                 }
6662                 break;
6663         }
6664 }
6665
6666 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
6667                                  struct amdgpu_irq_src *source,
6668                                  struct amdgpu_iv_entry *entry)
6669 {
6670         DRM_ERROR("Illegal register access in command stream\n");
6671         gfx_v8_0_fault(adev, entry);
6672         return 0;
6673 }
6674
6675 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
6676                                   struct amdgpu_irq_src *source,
6677                                   struct amdgpu_iv_entry *entry)
6678 {
6679         DRM_ERROR("Illegal instruction in command stream\n");
6680         gfx_v8_0_fault(adev, entry);
6681         return 0;
6682 }
6683
6684 static int gfx_v8_0_cp_ecc_error_irq(struct amdgpu_device *adev,
6685                                      struct amdgpu_irq_src *source,
6686                                      struct amdgpu_iv_entry *entry)
6687 {
6688         DRM_ERROR("CP EDC/ECC error detected.");
6689         return 0;
6690 }
6691
6692 static void gfx_v8_0_parse_sq_irq(struct amdgpu_device *adev, unsigned ih_data)
6693 {
6694         u32 enc, se_id, sh_id, cu_id;
6695         char type[20];
6696         int sq_edc_source = -1;
6697
6698         enc = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_CMN, ENCODING);
6699         se_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_CMN, SE_ID);
6700
6701         switch (enc) {
6702                 case 0:
6703                         DRM_INFO("SQ general purpose intr detected:"
6704                                         "se_id %d, immed_overflow %d, host_reg_overflow %d,"
6705                                         "host_cmd_overflow %d, cmd_timestamp %d,"
6706                                         "reg_timestamp %d, thread_trace_buff_full %d,"
6707                                         "wlt %d, thread_trace %d.\n",
6708                                         se_id,
6709                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, IMMED_OVERFLOW),
6710                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, HOST_REG_OVERFLOW),
6711                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, HOST_CMD_OVERFLOW),
6712                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, CMD_TIMESTAMP),
6713                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, REG_TIMESTAMP),
6714                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, THREAD_TRACE_BUF_FULL),
6715                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, WLT),
6716                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, THREAD_TRACE)
6717                                         );
6718                         break;
6719                 case 1:
6720                 case 2:
6721
6722                         cu_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, CU_ID);
6723                         sh_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, SH_ID);
6724
6725                         /*
6726                          * This function can be called either directly from ISR
6727                          * or from BH in which case we can access SQ_EDC_INFO
6728                          * instance
6729                          */
6730                         if (in_task()) {
6731                                 mutex_lock(&adev->grbm_idx_mutex);
6732                                 gfx_v8_0_select_se_sh(adev, se_id, sh_id, cu_id);
6733
6734                                 sq_edc_source = REG_GET_FIELD(RREG32(mmSQ_EDC_INFO), SQ_EDC_INFO, SOURCE);
6735
6736                                 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6737                                 mutex_unlock(&adev->grbm_idx_mutex);
6738                         }
6739
6740                         if (enc == 1)
6741                                 sprintf(type, "instruction intr");
6742                         else
6743                                 sprintf(type, "EDC/ECC error");
6744
6745                         DRM_INFO(
6746                                 "SQ %s detected: "
6747                                         "se_id %d, sh_id %d, cu_id %d, simd_id %d, wave_id %d, vm_id %d "
6748                                         "trap %s, sq_ed_info.source %s.\n",
6749                                         type, se_id, sh_id, cu_id,
6750                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, SIMD_ID),
6751                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, WAVE_ID),
6752                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, VM_ID),
6753                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, PRIV) ? "true" : "false",
6754                                         (sq_edc_source != -1) ? sq_edc_source_names[sq_edc_source] : "unavailable"
6755                                 );
6756                         break;
6757                 default:
6758                         DRM_ERROR("SQ invalid encoding type\n.");
6759         }
6760 }
6761
6762 static void gfx_v8_0_sq_irq_work_func(struct work_struct *work)
6763 {
6764
6765         struct amdgpu_device *adev = container_of(work, struct amdgpu_device, gfx.sq_work.work);
6766         struct sq_work *sq_work = container_of(work, struct sq_work, work);
6767
6768         gfx_v8_0_parse_sq_irq(adev, sq_work->ih_data);
6769 }
6770
6771 static int gfx_v8_0_sq_irq(struct amdgpu_device *adev,
6772                            struct amdgpu_irq_src *source,
6773                            struct amdgpu_iv_entry *entry)
6774 {
6775         unsigned ih_data = entry->src_data[0];
6776
6777         /*
6778          * Try to submit work so SQ_EDC_INFO can be accessed from
6779          * BH. If previous work submission hasn't finished yet
6780          * just print whatever info is possible directly from the ISR.
6781          */
6782         if (work_pending(&adev->gfx.sq_work.work)) {
6783                 gfx_v8_0_parse_sq_irq(adev, ih_data);
6784         } else {
6785                 adev->gfx.sq_work.ih_data = ih_data;
6786                 schedule_work(&adev->gfx.sq_work.work);
6787         }
6788
6789         return 0;
6790 }
6791
6792 static const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
6793         .name = "gfx_v8_0",
6794         .early_init = gfx_v8_0_early_init,
6795         .late_init = gfx_v8_0_late_init,
6796         .sw_init = gfx_v8_0_sw_init,
6797         .sw_fini = gfx_v8_0_sw_fini,
6798         .hw_init = gfx_v8_0_hw_init,
6799         .hw_fini = gfx_v8_0_hw_fini,
6800         .suspend = gfx_v8_0_suspend,
6801         .resume = gfx_v8_0_resume,
6802         .is_idle = gfx_v8_0_is_idle,
6803         .wait_for_idle = gfx_v8_0_wait_for_idle,
6804         .check_soft_reset = gfx_v8_0_check_soft_reset,
6805         .pre_soft_reset = gfx_v8_0_pre_soft_reset,
6806         .soft_reset = gfx_v8_0_soft_reset,
6807         .post_soft_reset = gfx_v8_0_post_soft_reset,
6808         .set_clockgating_state = gfx_v8_0_set_clockgating_state,
6809         .set_powergating_state = gfx_v8_0_set_powergating_state,
6810         .get_clockgating_state = gfx_v8_0_get_clockgating_state,
6811 };
6812
6813 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
6814         .type = AMDGPU_RING_TYPE_GFX,
6815         .align_mask = 0xff,
6816         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6817         .support_64bit_ptrs = false,
6818         .get_rptr = gfx_v8_0_ring_get_rptr,
6819         .get_wptr = gfx_v8_0_ring_get_wptr_gfx,
6820         .set_wptr = gfx_v8_0_ring_set_wptr_gfx,
6821         .emit_frame_size = /* maximum 215dw if count 16 IBs in */
6822                 5 +  /* COND_EXEC */
6823                 7 +  /* PIPELINE_SYNC */
6824                 VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 9 + /* VM_FLUSH */
6825                 8 +  /* FENCE for VM_FLUSH */
6826                 20 + /* GDS switch */
6827                 4 + /* double SWITCH_BUFFER,
6828                        the first COND_EXEC jump to the place just
6829                            prior to this double SWITCH_BUFFER  */
6830                 5 + /* COND_EXEC */
6831                 7 +      /*     HDP_flush */
6832                 4 +      /*     VGT_flush */
6833                 14 + /* CE_META */
6834                 31 + /* DE_META */
6835                 3 + /* CNTX_CTRL */
6836                 5 + /* HDP_INVL */
6837                 8 + 8 + /* FENCE x2 */
6838                 2, /* SWITCH_BUFFER */
6839         .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_gfx */
6840         .emit_ib = gfx_v8_0_ring_emit_ib_gfx,
6841         .emit_fence = gfx_v8_0_ring_emit_fence_gfx,
6842         .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6843         .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6844         .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6845         .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6846         .test_ring = gfx_v8_0_ring_test_ring,
6847         .test_ib = gfx_v8_0_ring_test_ib,
6848         .insert_nop = amdgpu_ring_insert_nop,
6849         .pad_ib = amdgpu_ring_generic_pad_ib,
6850         .emit_switch_buffer = gfx_v8_ring_emit_sb,
6851         .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
6852         .init_cond_exec = gfx_v8_0_ring_emit_init_cond_exec,
6853         .patch_cond_exec = gfx_v8_0_ring_emit_patch_cond_exec,
6854         .emit_wreg = gfx_v8_0_ring_emit_wreg,
6855         .soft_recovery = gfx_v8_0_ring_soft_recovery,
6856 };
6857
6858 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
6859         .type = AMDGPU_RING_TYPE_COMPUTE,
6860         .align_mask = 0xff,
6861         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6862         .support_64bit_ptrs = false,
6863         .get_rptr = gfx_v8_0_ring_get_rptr,
6864         .get_wptr = gfx_v8_0_ring_get_wptr_compute,
6865         .set_wptr = gfx_v8_0_ring_set_wptr_compute,
6866         .emit_frame_size =
6867                 20 + /* gfx_v8_0_ring_emit_gds_switch */
6868                 7 + /* gfx_v8_0_ring_emit_hdp_flush */
6869                 5 + /* hdp_invalidate */
6870                 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6871                 VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v8_0_ring_emit_vm_flush */
6872                 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
6873         .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */
6874         .emit_ib = gfx_v8_0_ring_emit_ib_compute,
6875         .emit_fence = gfx_v8_0_ring_emit_fence_compute,
6876         .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6877         .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6878         .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6879         .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6880         .test_ring = gfx_v8_0_ring_test_ring,
6881         .test_ib = gfx_v8_0_ring_test_ib,
6882         .insert_nop = amdgpu_ring_insert_nop,
6883         .pad_ib = amdgpu_ring_generic_pad_ib,
6884         .set_priority = gfx_v8_0_ring_set_priority_compute,
6885         .emit_wreg = gfx_v8_0_ring_emit_wreg,
6886 };
6887
6888 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {
6889         .type = AMDGPU_RING_TYPE_KIQ,
6890         .align_mask = 0xff,
6891         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6892         .support_64bit_ptrs = false,
6893         .get_rptr = gfx_v8_0_ring_get_rptr,
6894         .get_wptr = gfx_v8_0_ring_get_wptr_compute,
6895         .set_wptr = gfx_v8_0_ring_set_wptr_compute,
6896         .emit_frame_size =
6897                 20 + /* gfx_v8_0_ring_emit_gds_switch */
6898                 7 + /* gfx_v8_0_ring_emit_hdp_flush */
6899                 5 + /* hdp_invalidate */
6900                 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6901                 17 + /* gfx_v8_0_ring_emit_vm_flush */
6902                 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */
6903         .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */
6904         .emit_fence = gfx_v8_0_ring_emit_fence_kiq,
6905         .test_ring = gfx_v8_0_ring_test_ring,
6906         .insert_nop = amdgpu_ring_insert_nop,
6907         .pad_ib = amdgpu_ring_generic_pad_ib,
6908         .emit_rreg = gfx_v8_0_ring_emit_rreg,
6909         .emit_wreg = gfx_v8_0_ring_emit_wreg,
6910 };
6911
6912 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
6913 {
6914         int i;
6915
6916         adev->gfx.kiq.ring.funcs = &gfx_v8_0_ring_funcs_kiq;
6917
6918         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
6919                 adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
6920
6921         for (i = 0; i < adev->gfx.num_compute_rings; i++)
6922                 adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
6923 }
6924
6925 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
6926         .set = gfx_v8_0_set_eop_interrupt_state,
6927         .process = gfx_v8_0_eop_irq,
6928 };
6929
6930 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
6931         .set = gfx_v8_0_set_priv_reg_fault_state,
6932         .process = gfx_v8_0_priv_reg_irq,
6933 };
6934
6935 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
6936         .set = gfx_v8_0_set_priv_inst_fault_state,
6937         .process = gfx_v8_0_priv_inst_irq,
6938 };
6939
6940 static const struct amdgpu_irq_src_funcs gfx_v8_0_cp_ecc_error_irq_funcs = {
6941         .set = gfx_v8_0_set_cp_ecc_int_state,
6942         .process = gfx_v8_0_cp_ecc_error_irq,
6943 };
6944
6945 static const struct amdgpu_irq_src_funcs gfx_v8_0_sq_irq_funcs = {
6946         .set = gfx_v8_0_set_sq_int_state,
6947         .process = gfx_v8_0_sq_irq,
6948 };
6949
6950 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
6951 {
6952         adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
6953         adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
6954
6955         adev->gfx.priv_reg_irq.num_types = 1;
6956         adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
6957
6958         adev->gfx.priv_inst_irq.num_types = 1;
6959         adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
6960
6961         adev->gfx.cp_ecc_error_irq.num_types = 1;
6962         adev->gfx.cp_ecc_error_irq.funcs = &gfx_v8_0_cp_ecc_error_irq_funcs;
6963
6964         adev->gfx.sq_irq.num_types = 1;
6965         adev->gfx.sq_irq.funcs = &gfx_v8_0_sq_irq_funcs;
6966 }
6967
6968 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
6969 {
6970         adev->gfx.rlc.funcs = &iceland_rlc_funcs;
6971 }
6972
6973 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
6974 {
6975         /* init asci gds info */
6976         adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
6977         adev->gds.gws.total_size = 64;
6978         adev->gds.oa.total_size = 16;
6979
6980         if (adev->gds.mem.total_size == 64 * 1024) {
6981                 adev->gds.mem.gfx_partition_size = 4096;
6982                 adev->gds.mem.cs_partition_size = 4096;
6983
6984                 adev->gds.gws.gfx_partition_size = 4;
6985                 adev->gds.gws.cs_partition_size = 4;
6986
6987                 adev->gds.oa.gfx_partition_size = 4;
6988                 adev->gds.oa.cs_partition_size = 1;
6989         } else {
6990                 adev->gds.mem.gfx_partition_size = 1024;
6991                 adev->gds.mem.cs_partition_size = 1024;
6992
6993                 adev->gds.gws.gfx_partition_size = 16;
6994                 adev->gds.gws.cs_partition_size = 16;
6995
6996                 adev->gds.oa.gfx_partition_size = 4;
6997                 adev->gds.oa.cs_partition_size = 4;
6998         }
6999 }
7000
7001 static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
7002                                                  u32 bitmap)
7003 {
7004         u32 data;
7005
7006         if (!bitmap)
7007                 return;
7008
7009         data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7010         data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7011
7012         WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
7013 }
7014
7015 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
7016 {
7017         u32 data, mask;
7018
7019         data =  RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) |
7020                 RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
7021
7022         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
7023
7024         return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask;
7025 }
7026
7027 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
7028 {
7029         int i, j, k, counter, active_cu_number = 0;
7030         u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
7031         struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
7032         unsigned disable_masks[4 * 2];
7033         u32 ao_cu_num;
7034
7035         memset(cu_info, 0, sizeof(*cu_info));
7036
7037         if (adev->flags & AMD_IS_APU)
7038                 ao_cu_num = 2;
7039         else
7040                 ao_cu_num = adev->gfx.config.max_cu_per_sh;
7041
7042         amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
7043
7044         mutex_lock(&adev->grbm_idx_mutex);
7045         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
7046                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
7047                         mask = 1;
7048                         ao_bitmap = 0;
7049                         counter = 0;
7050                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
7051                         if (i < 4 && j < 2)
7052                                 gfx_v8_0_set_user_cu_inactive_bitmap(
7053                                         adev, disable_masks[i * 2 + j]);
7054                         bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
7055                         cu_info->bitmap[i][j] = bitmap;
7056
7057                         for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
7058                                 if (bitmap & mask) {
7059                                         if (counter < ao_cu_num)
7060                                                 ao_bitmap |= mask;
7061                                         counter ++;
7062                                 }
7063                                 mask <<= 1;
7064                         }
7065                         active_cu_number += counter;
7066                         if (i < 2 && j < 2)
7067                                 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
7068                         cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
7069                 }
7070         }
7071         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
7072         mutex_unlock(&adev->grbm_idx_mutex);
7073
7074         cu_info->number = active_cu_number;
7075         cu_info->ao_cu_mask = ao_cu_mask;
7076         cu_info->simd_per_cu = NUM_SIMD_PER_CU;
7077         cu_info->max_waves_per_simd = 10;
7078         cu_info->max_scratch_slots_per_cu = 32;
7079         cu_info->wave_front_size = 64;
7080         cu_info->lds_size = 64;
7081 }
7082
7083 const struct amdgpu_ip_block_version gfx_v8_0_ip_block =
7084 {
7085         .type = AMD_IP_BLOCK_TYPE_GFX,
7086         .major = 8,
7087         .minor = 0,
7088         .rev = 0,
7089         .funcs = &gfx_v8_0_ip_funcs,
7090 };
7091
7092 const struct amdgpu_ip_block_version gfx_v8_1_ip_block =
7093 {
7094         .type = AMD_IP_BLOCK_TYPE_GFX,
7095         .major = 8,
7096         .minor = 1,
7097         .rev = 0,
7098         .funcs = &gfx_v8_0_ip_funcs,
7099 };
7100
7101 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
7102 {
7103         uint64_t ce_payload_addr;
7104         int cnt_ce;
7105         union {
7106                 struct vi_ce_ib_state regular;
7107                 struct vi_ce_ib_state_chained_ib chained;
7108         } ce_payload = {};
7109
7110         if (ring->adev->virt.chained_ib_support) {
7111                 ce_payload_addr = amdgpu_csa_vaddr(ring->adev) +
7112                         offsetof(struct vi_gfx_meta_data_chained_ib, ce_payload);
7113                 cnt_ce = (sizeof(ce_payload.chained) >> 2) + 4 - 2;
7114         } else {
7115                 ce_payload_addr = amdgpu_csa_vaddr(ring->adev) +
7116                         offsetof(struct vi_gfx_meta_data, ce_payload);
7117                 cnt_ce = (sizeof(ce_payload.regular) >> 2) + 4 - 2;
7118         }
7119
7120         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_ce));
7121         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
7122                                 WRITE_DATA_DST_SEL(8) |
7123                                 WR_CONFIRM) |
7124                                 WRITE_DATA_CACHE_POLICY(0));
7125         amdgpu_ring_write(ring, lower_32_bits(ce_payload_addr));
7126         amdgpu_ring_write(ring, upper_32_bits(ce_payload_addr));
7127         amdgpu_ring_write_multiple(ring, (void *)&ce_payload, cnt_ce - 2);
7128 }
7129
7130 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring)
7131 {
7132         uint64_t de_payload_addr, gds_addr, csa_addr;
7133         int cnt_de;
7134         union {
7135                 struct vi_de_ib_state regular;
7136                 struct vi_de_ib_state_chained_ib chained;
7137         } de_payload = {};
7138
7139         csa_addr = amdgpu_csa_vaddr(ring->adev);
7140         gds_addr = csa_addr + 4096;
7141         if (ring->adev->virt.chained_ib_support) {
7142                 de_payload.chained.gds_backup_addrlo = lower_32_bits(gds_addr);
7143                 de_payload.chained.gds_backup_addrhi = upper_32_bits(gds_addr);
7144                 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data_chained_ib, de_payload);
7145                 cnt_de = (sizeof(de_payload.chained) >> 2) + 4 - 2;
7146         } else {
7147                 de_payload.regular.gds_backup_addrlo = lower_32_bits(gds_addr);
7148                 de_payload.regular.gds_backup_addrhi = upper_32_bits(gds_addr);
7149                 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data, de_payload);
7150                 cnt_de = (sizeof(de_payload.regular) >> 2) + 4 - 2;
7151         }
7152
7153         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_de));
7154         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
7155                                 WRITE_DATA_DST_SEL(8) |
7156                                 WR_CONFIRM) |
7157                                 WRITE_DATA_CACHE_POLICY(0));
7158         amdgpu_ring_write(ring, lower_32_bits(de_payload_addr));
7159         amdgpu_ring_write(ring, upper_32_bits(de_payload_addr));
7160         amdgpu_ring_write_multiple(ring, (void *)&de_payload, cnt_de - 2);
7161 }
This page took 0.457668 seconds and 4 git commands to generate.