]> Git Repo - J-linux.git/blob - drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
Merge branch 'etnaviv/next' of https://git.pengutronix.de/git/lst/linux into drm...
[J-linux.git] / drivers / gpu / drm / amd / amdgpu / gfx_v8_0.c
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23
24 #include <linux/delay.h>
25 #include <linux/kernel.h>
26 #include <linux/firmware.h>
27 #include <linux/module.h>
28 #include <linux/pci.h>
29
30 #include "amdgpu.h"
31 #include "amdgpu_gfx.h"
32 #include "vi.h"
33 #include "vi_structs.h"
34 #include "vid.h"
35 #include "amdgpu_ucode.h"
36 #include "amdgpu_atombios.h"
37 #include "atombios_i2c.h"
38 #include "clearstate_vi.h"
39
40 #include "gmc/gmc_8_2_d.h"
41 #include "gmc/gmc_8_2_sh_mask.h"
42
43 #include "oss/oss_3_0_d.h"
44 #include "oss/oss_3_0_sh_mask.h"
45
46 #include "bif/bif_5_0_d.h"
47 #include "bif/bif_5_0_sh_mask.h"
48 #include "gca/gfx_8_0_d.h"
49 #include "gca/gfx_8_0_enum.h"
50 #include "gca/gfx_8_0_sh_mask.h"
51
52 #include "dce/dce_10_0_d.h"
53 #include "dce/dce_10_0_sh_mask.h"
54
55 #include "smu/smu_7_1_3_d.h"
56
57 #include "ivsrcid/ivsrcid_vislands30.h"
58
59 #define GFX8_NUM_GFX_RINGS     1
60 #define GFX8_MEC_HPD_SIZE 4096
61
62 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
63 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
64 #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
65 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
66
67 #define ARRAY_MODE(x)                                   ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
68 #define PIPE_CONFIG(x)                                  ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
69 #define TILE_SPLIT(x)                                   ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
70 #define MICRO_TILE_MODE_NEW(x)                          ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
71 #define SAMPLE_SPLIT(x)                                 ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
72 #define BANK_WIDTH(x)                                   ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
73 #define BANK_HEIGHT(x)                                  ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
74 #define MACRO_TILE_ASPECT(x)                            ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
75 #define NUM_BANKS(x)                                    ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
76
77 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK            0x00000001L
78 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK            0x00000002L
79 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK           0x00000004L
80 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK           0x00000008L
81 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK           0x00000010L
82 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK           0x00000020L
83
84 /* BPM SERDES CMD */
85 #define SET_BPM_SERDES_CMD    1
86 #define CLE_BPM_SERDES_CMD    0
87
88 /* BPM Register Address*/
89 enum {
90         BPM_REG_CGLS_EN = 0,        /* Enable/Disable CGLS */
91         BPM_REG_CGLS_ON,            /* ON/OFF CGLS: shall be controlled by RLC FW */
92         BPM_REG_CGCG_OVERRIDE,      /* Set/Clear CGCG Override */
93         BPM_REG_MGCG_OVERRIDE,      /* Set/Clear MGCG Override */
94         BPM_REG_FGCG_OVERRIDE,      /* Set/Clear FGCG Override */
95         BPM_REG_FGCG_MAX
96 };
97
98 #define RLC_FormatDirectRegListLength        14
99
100 MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
101 MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
102 MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
103 MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
104 MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
105 MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
106
107 MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
108 MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
109 MODULE_FIRMWARE("amdgpu/stoney_me.bin");
110 MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
111 MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
112
113 MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
114 MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
115 MODULE_FIRMWARE("amdgpu/tonga_me.bin");
116 MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
117 MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
118 MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
119
120 MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
121 MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
122 MODULE_FIRMWARE("amdgpu/topaz_me.bin");
123 MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
124 MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
125
126 MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
127 MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
128 MODULE_FIRMWARE("amdgpu/fiji_me.bin");
129 MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
130 MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
131 MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
132
133 MODULE_FIRMWARE("amdgpu/polaris10_ce.bin");
134 MODULE_FIRMWARE("amdgpu/polaris10_ce_2.bin");
135 MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin");
136 MODULE_FIRMWARE("amdgpu/polaris10_pfp_2.bin");
137 MODULE_FIRMWARE("amdgpu/polaris10_me.bin");
138 MODULE_FIRMWARE("amdgpu/polaris10_me_2.bin");
139 MODULE_FIRMWARE("amdgpu/polaris10_mec.bin");
140 MODULE_FIRMWARE("amdgpu/polaris10_mec_2.bin");
141 MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin");
142 MODULE_FIRMWARE("amdgpu/polaris10_mec2_2.bin");
143 MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin");
144
145 MODULE_FIRMWARE("amdgpu/polaris11_ce.bin");
146 MODULE_FIRMWARE("amdgpu/polaris11_ce_2.bin");
147 MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin");
148 MODULE_FIRMWARE("amdgpu/polaris11_pfp_2.bin");
149 MODULE_FIRMWARE("amdgpu/polaris11_me.bin");
150 MODULE_FIRMWARE("amdgpu/polaris11_me_2.bin");
151 MODULE_FIRMWARE("amdgpu/polaris11_mec.bin");
152 MODULE_FIRMWARE("amdgpu/polaris11_mec_2.bin");
153 MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin");
154 MODULE_FIRMWARE("amdgpu/polaris11_mec2_2.bin");
155 MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin");
156
157 MODULE_FIRMWARE("amdgpu/polaris12_ce.bin");
158 MODULE_FIRMWARE("amdgpu/polaris12_ce_2.bin");
159 MODULE_FIRMWARE("amdgpu/polaris12_pfp.bin");
160 MODULE_FIRMWARE("amdgpu/polaris12_pfp_2.bin");
161 MODULE_FIRMWARE("amdgpu/polaris12_me.bin");
162 MODULE_FIRMWARE("amdgpu/polaris12_me_2.bin");
163 MODULE_FIRMWARE("amdgpu/polaris12_mec.bin");
164 MODULE_FIRMWARE("amdgpu/polaris12_mec_2.bin");
165 MODULE_FIRMWARE("amdgpu/polaris12_mec2.bin");
166 MODULE_FIRMWARE("amdgpu/polaris12_mec2_2.bin");
167 MODULE_FIRMWARE("amdgpu/polaris12_rlc.bin");
168
169 MODULE_FIRMWARE("amdgpu/vegam_ce.bin");
170 MODULE_FIRMWARE("amdgpu/vegam_pfp.bin");
171 MODULE_FIRMWARE("amdgpu/vegam_me.bin");
172 MODULE_FIRMWARE("amdgpu/vegam_mec.bin");
173 MODULE_FIRMWARE("amdgpu/vegam_mec2.bin");
174 MODULE_FIRMWARE("amdgpu/vegam_rlc.bin");
175
176 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
177 {
178         {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
179         {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
180         {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
181         {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
182         {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
183         {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
184         {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
185         {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
186         {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
187         {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
188         {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
189         {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
190         {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
191         {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
192         {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
193         {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
194 };
195
196 static const u32 golden_settings_tonga_a11[] =
197 {
198         mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
199         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
200         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
201         mmGB_GPU_ID, 0x0000000f, 0x00000000,
202         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
203         mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
204         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
205         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
206         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
207         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
208         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
209         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
210         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
211         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
212         mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
213         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
214 };
215
216 static const u32 tonga_golden_common_all[] =
217 {
218         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
219         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
220         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
221         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
222         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
223         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
224         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
225         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
226 };
227
228 static const u32 tonga_mgcg_cgcg_init[] =
229 {
230         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
231         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
232         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
233         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
234         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
235         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
236         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
237         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
238         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
239         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
240         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
241         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
242         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
243         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
244         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
245         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
246         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
247         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
248         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
249         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
250         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
251         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
252         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
253         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
254         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
255         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
256         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
257         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
258         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
259         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
260         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
261         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
262         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
263         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
264         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
265         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
266         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
267         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
268         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
269         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
270         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
271         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
272         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
273         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
274         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
275         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
276         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
277         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
278         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
279         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
280         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
281         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
282         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
283         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
284         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
285         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
286         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
287         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
288         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
289         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
290         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
291         mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
292         mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
293         mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
294         mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
295         mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
296         mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
297         mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
298         mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
299         mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
300         mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
301         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
302         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
303         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
304         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
305 };
306
307 static const u32 golden_settings_vegam_a11[] =
308 {
309         mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
310         mmCB_HW_CONTROL_2, 0x0f000000, 0x0d000000,
311         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
312         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
313         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
314         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
315         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x3a00161a,
316         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002e,
317         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
318         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
319         mmSQ_CONFIG, 0x07f80000, 0x01180000,
320         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
321         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
322         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
323         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
324         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x32761054,
325         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
326 };
327
328 static const u32 vegam_golden_common_all[] =
329 {
330         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
331         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
332         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
333         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
334         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
335         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
336 };
337
338 static const u32 golden_settings_polaris11_a11[] =
339 {
340         mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208,
341         mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
342         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
343         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
344         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
345         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
346         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
347         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
348         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
349         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
350         mmSQ_CONFIG, 0x07f80000, 0x01180000,
351         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
352         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
353         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
354         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
355         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
356         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
357 };
358
359 static const u32 polaris11_golden_common_all[] =
360 {
361         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
362         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002,
363         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
364         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
365         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
366         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
367 };
368
369 static const u32 golden_settings_polaris10_a11[] =
370 {
371         mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
372         mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
373         mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
374         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
375         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
376         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
377         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
378         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
379         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a,
380         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
381         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
382         mmSQ_CONFIG, 0x07f80000, 0x07180000,
383         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
384         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
385         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
386         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
387         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
388 };
389
390 static const u32 polaris10_golden_common_all[] =
391 {
392         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
393         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
394         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
395         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
396         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
397         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
398         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
399         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
400 };
401
402 static const u32 fiji_golden_common_all[] =
403 {
404         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
405         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
406         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
407         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
408         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
409         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
410         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
411         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
412         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
413         mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
414 };
415
416 static const u32 golden_settings_fiji_a10[] =
417 {
418         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
419         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
420         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
421         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
422         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
423         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
424         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
425         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
426         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
427         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
428         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
429 };
430
431 static const u32 fiji_mgcg_cgcg_init[] =
432 {
433         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
434         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
435         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
436         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
437         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
438         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
439         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
440         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
441         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
442         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
443         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
444         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
445         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
446         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
447         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
448         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
449         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
450         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
451         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
452         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
453         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
454         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
455         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
456         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
457         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
458         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
459         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
460         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
461         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
462         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
463         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
464         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
465         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
466         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
467         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
468 };
469
470 static const u32 golden_settings_iceland_a11[] =
471 {
472         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
473         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
474         mmDB_DEBUG3, 0xc0000000, 0xc0000000,
475         mmGB_GPU_ID, 0x0000000f, 0x00000000,
476         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
477         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
478         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
479         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
480         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
481         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
482         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
483         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
484         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
485         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
486         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
487         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
488 };
489
490 static const u32 iceland_golden_common_all[] =
491 {
492         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
493         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
494         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
495         mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
496         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
497         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
498         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
499         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
500 };
501
502 static const u32 iceland_mgcg_cgcg_init[] =
503 {
504         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
505         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
506         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
507         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
508         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
509         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
510         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
511         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
512         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
513         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
514         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
515         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
516         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
517         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
518         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
519         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
520         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
521         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
522         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
523         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
524         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
525         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
526         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
527         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
528         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
529         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
530         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
531         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
532         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
533         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
534         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
535         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
536         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
537         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
538         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
539         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
540         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
541         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
542         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
543         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
544         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
545         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
546         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
547         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
548         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
549         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
550         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
551         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
552         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
553         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
554         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
555         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
556         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
557         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
558         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
559         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
560         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
561         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
562         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
563         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
564         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
565         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
566         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
567         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
568 };
569
570 static const u32 cz_golden_settings_a11[] =
571 {
572         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
573         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
574         mmGB_GPU_ID, 0x0000000f, 0x00000000,
575         mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
576         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
577         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
578         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
579         mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
580         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
581         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
582         mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
583         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
584 };
585
586 static const u32 cz_golden_common_all[] =
587 {
588         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
589         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
590         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
591         mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
592         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
593         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
594         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
595         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
596 };
597
598 static const u32 cz_mgcg_cgcg_init[] =
599 {
600         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
601         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
602         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
603         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
604         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
605         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
606         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
607         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
608         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
609         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
610         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
611         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
612         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
613         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
614         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
615         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
616         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
617         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
618         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
619         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
620         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
621         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
622         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
623         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
624         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
625         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
626         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
627         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
628         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
629         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
630         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
631         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
632         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
633         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
634         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
635         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
636         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
637         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
638         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
639         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
640         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
641         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
642         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
643         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
644         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
645         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
646         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
647         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
648         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
649         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
650         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
651         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
652         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
653         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
654         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
655         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
656         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
657         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
658         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
659         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
660         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
661         mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
662         mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
663         mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
664         mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
665         mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
666         mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
667         mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
668         mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
669         mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
670         mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
671         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
672         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
673         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
674         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
675 };
676
677 static const u32 stoney_golden_settings_a11[] =
678 {
679         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
680         mmGB_GPU_ID, 0x0000000f, 0x00000000,
681         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
682         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
683         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
684         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
685         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
686         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
687         mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
688         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
689 };
690
691 static const u32 stoney_golden_common_all[] =
692 {
693         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
694         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
695         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
696         mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
697         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
698         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
699         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
700         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
701 };
702
703 static const u32 stoney_mgcg_cgcg_init[] =
704 {
705         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
706         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
707         mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
708         mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
709         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
710 };
711
712
713 static const char * const sq_edc_source_names[] = {
714         "SQ_EDC_INFO_SOURCE_INVALID: No EDC error has occurred",
715         "SQ_EDC_INFO_SOURCE_INST: EDC source is Instruction Fetch",
716         "SQ_EDC_INFO_SOURCE_SGPR: EDC source is SGPR or SQC data return",
717         "SQ_EDC_INFO_SOURCE_VGPR: EDC source is VGPR",
718         "SQ_EDC_INFO_SOURCE_LDS: EDC source is LDS",
719         "SQ_EDC_INFO_SOURCE_GDS: EDC source is GDS",
720         "SQ_EDC_INFO_SOURCE_TA: EDC source is TA",
721 };
722
723 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
724 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
725 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
726 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
727 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
728 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
729 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring);
730 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring);
731
732 #define CG_ACLK_CNTL__ACLK_DIVIDER_MASK                    0x0000007fL
733 #define CG_ACLK_CNTL__ACLK_DIVIDER__SHIFT                  0x00000000L
734
735 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
736 {
737         uint32_t data;
738
739         switch (adev->asic_type) {
740         case CHIP_TOPAZ:
741                 amdgpu_device_program_register_sequence(adev,
742                                                         iceland_mgcg_cgcg_init,
743                                                         ARRAY_SIZE(iceland_mgcg_cgcg_init));
744                 amdgpu_device_program_register_sequence(adev,
745                                                         golden_settings_iceland_a11,
746                                                         ARRAY_SIZE(golden_settings_iceland_a11));
747                 amdgpu_device_program_register_sequence(adev,
748                                                         iceland_golden_common_all,
749                                                         ARRAY_SIZE(iceland_golden_common_all));
750                 break;
751         case CHIP_FIJI:
752                 amdgpu_device_program_register_sequence(adev,
753                                                         fiji_mgcg_cgcg_init,
754                                                         ARRAY_SIZE(fiji_mgcg_cgcg_init));
755                 amdgpu_device_program_register_sequence(adev,
756                                                         golden_settings_fiji_a10,
757                                                         ARRAY_SIZE(golden_settings_fiji_a10));
758                 amdgpu_device_program_register_sequence(adev,
759                                                         fiji_golden_common_all,
760                                                         ARRAY_SIZE(fiji_golden_common_all));
761                 break;
762
763         case CHIP_TONGA:
764                 amdgpu_device_program_register_sequence(adev,
765                                                         tonga_mgcg_cgcg_init,
766                                                         ARRAY_SIZE(tonga_mgcg_cgcg_init));
767                 amdgpu_device_program_register_sequence(adev,
768                                                         golden_settings_tonga_a11,
769                                                         ARRAY_SIZE(golden_settings_tonga_a11));
770                 amdgpu_device_program_register_sequence(adev,
771                                                         tonga_golden_common_all,
772                                                         ARRAY_SIZE(tonga_golden_common_all));
773                 break;
774         case CHIP_VEGAM:
775                 amdgpu_device_program_register_sequence(adev,
776                                                         golden_settings_vegam_a11,
777                                                         ARRAY_SIZE(golden_settings_vegam_a11));
778                 amdgpu_device_program_register_sequence(adev,
779                                                         vegam_golden_common_all,
780                                                         ARRAY_SIZE(vegam_golden_common_all));
781                 break;
782         case CHIP_POLARIS11:
783         case CHIP_POLARIS12:
784                 amdgpu_device_program_register_sequence(adev,
785                                                         golden_settings_polaris11_a11,
786                                                         ARRAY_SIZE(golden_settings_polaris11_a11));
787                 amdgpu_device_program_register_sequence(adev,
788                                                         polaris11_golden_common_all,
789                                                         ARRAY_SIZE(polaris11_golden_common_all));
790                 break;
791         case CHIP_POLARIS10:
792                 amdgpu_device_program_register_sequence(adev,
793                                                         golden_settings_polaris10_a11,
794                                                         ARRAY_SIZE(golden_settings_polaris10_a11));
795                 amdgpu_device_program_register_sequence(adev,
796                                                         polaris10_golden_common_all,
797                                                         ARRAY_SIZE(polaris10_golden_common_all));
798                 data = RREG32_SMC(ixCG_ACLK_CNTL);
799                 data &= ~CG_ACLK_CNTL__ACLK_DIVIDER_MASK;
800                 data |= 0x18 << CG_ACLK_CNTL__ACLK_DIVIDER__SHIFT;
801                 WREG32_SMC(ixCG_ACLK_CNTL, data);
802                 if ((adev->pdev->device == 0x67DF) && (adev->pdev->revision == 0xc7) &&
803                     ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) ||
804                      (adev->pdev->subsystem_device == 0x4a8 && adev->pdev->subsystem_vendor == 0x1043) ||
805                      (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1680))) {
806                         amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD);
807                         amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0);
808                 }
809                 break;
810         case CHIP_CARRIZO:
811                 amdgpu_device_program_register_sequence(adev,
812                                                         cz_mgcg_cgcg_init,
813                                                         ARRAY_SIZE(cz_mgcg_cgcg_init));
814                 amdgpu_device_program_register_sequence(adev,
815                                                         cz_golden_settings_a11,
816                                                         ARRAY_SIZE(cz_golden_settings_a11));
817                 amdgpu_device_program_register_sequence(adev,
818                                                         cz_golden_common_all,
819                                                         ARRAY_SIZE(cz_golden_common_all));
820                 break;
821         case CHIP_STONEY:
822                 amdgpu_device_program_register_sequence(adev,
823                                                         stoney_mgcg_cgcg_init,
824                                                         ARRAY_SIZE(stoney_mgcg_cgcg_init));
825                 amdgpu_device_program_register_sequence(adev,
826                                                         stoney_golden_settings_a11,
827                                                         ARRAY_SIZE(stoney_golden_settings_a11));
828                 amdgpu_device_program_register_sequence(adev,
829                                                         stoney_golden_common_all,
830                                                         ARRAY_SIZE(stoney_golden_common_all));
831                 break;
832         default:
833                 break;
834         }
835 }
836
837 static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
838 {
839         adev->gfx.scratch.num_reg = 8;
840         adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
841         adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
842 }
843
844 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
845 {
846         struct amdgpu_device *adev = ring->adev;
847         uint32_t scratch;
848         uint32_t tmp = 0;
849         unsigned i;
850         int r;
851
852         r = amdgpu_gfx_scratch_get(adev, &scratch);
853         if (r)
854                 return r;
855
856         WREG32(scratch, 0xCAFEDEAD);
857         r = amdgpu_ring_alloc(ring, 3);
858         if (r)
859                 goto error_free_scratch;
860
861         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
862         amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
863         amdgpu_ring_write(ring, 0xDEADBEEF);
864         amdgpu_ring_commit(ring);
865
866         for (i = 0; i < adev->usec_timeout; i++) {
867                 tmp = RREG32(scratch);
868                 if (tmp == 0xDEADBEEF)
869                         break;
870                 udelay(1);
871         }
872
873         if (i >= adev->usec_timeout)
874                 r = -ETIMEDOUT;
875
876 error_free_scratch:
877         amdgpu_gfx_scratch_free(adev, scratch);
878         return r;
879 }
880
881 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
882 {
883         struct amdgpu_device *adev = ring->adev;
884         struct amdgpu_ib ib;
885         struct dma_fence *f = NULL;
886
887         unsigned int index;
888         uint64_t gpu_addr;
889         uint32_t tmp;
890         long r;
891
892         r = amdgpu_device_wb_get(adev, &index);
893         if (r)
894                 return r;
895
896         gpu_addr = adev->wb.gpu_addr + (index * 4);
897         adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
898         memset(&ib, 0, sizeof(ib));
899         r = amdgpu_ib_get(adev, NULL, 16,
900                                         AMDGPU_IB_POOL_DIRECT, &ib);
901         if (r)
902                 goto err1;
903
904         ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
905         ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
906         ib.ptr[2] = lower_32_bits(gpu_addr);
907         ib.ptr[3] = upper_32_bits(gpu_addr);
908         ib.ptr[4] = 0xDEADBEEF;
909         ib.length_dw = 5;
910
911         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
912         if (r)
913                 goto err2;
914
915         r = dma_fence_wait_timeout(f, false, timeout);
916         if (r == 0) {
917                 r = -ETIMEDOUT;
918                 goto err2;
919         } else if (r < 0) {
920                 goto err2;
921         }
922
923         tmp = adev->wb.wb[index];
924         if (tmp == 0xDEADBEEF)
925                 r = 0;
926         else
927                 r = -EINVAL;
928
929 err2:
930         amdgpu_ib_free(adev, &ib, NULL);
931         dma_fence_put(f);
932 err1:
933         amdgpu_device_wb_free(adev, index);
934         return r;
935 }
936
937
938 static void gfx_v8_0_free_microcode(struct amdgpu_device *adev)
939 {
940         release_firmware(adev->gfx.pfp_fw);
941         adev->gfx.pfp_fw = NULL;
942         release_firmware(adev->gfx.me_fw);
943         adev->gfx.me_fw = NULL;
944         release_firmware(adev->gfx.ce_fw);
945         adev->gfx.ce_fw = NULL;
946         release_firmware(adev->gfx.rlc_fw);
947         adev->gfx.rlc_fw = NULL;
948         release_firmware(adev->gfx.mec_fw);
949         adev->gfx.mec_fw = NULL;
950         if ((adev->asic_type != CHIP_STONEY) &&
951             (adev->asic_type != CHIP_TOPAZ))
952                 release_firmware(adev->gfx.mec2_fw);
953         adev->gfx.mec2_fw = NULL;
954
955         kfree(adev->gfx.rlc.register_list_format);
956 }
957
958 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
959 {
960         const char *chip_name;
961         char fw_name[30];
962         int err;
963         struct amdgpu_firmware_info *info = NULL;
964         const struct common_firmware_header *header = NULL;
965         const struct gfx_firmware_header_v1_0 *cp_hdr;
966         const struct rlc_firmware_header_v2_0 *rlc_hdr;
967         unsigned int *tmp = NULL, i;
968
969         DRM_DEBUG("\n");
970
971         switch (adev->asic_type) {
972         case CHIP_TOPAZ:
973                 chip_name = "topaz";
974                 break;
975         case CHIP_TONGA:
976                 chip_name = "tonga";
977                 break;
978         case CHIP_CARRIZO:
979                 chip_name = "carrizo";
980                 break;
981         case CHIP_FIJI:
982                 chip_name = "fiji";
983                 break;
984         case CHIP_STONEY:
985                 chip_name = "stoney";
986                 break;
987         case CHIP_POLARIS10:
988                 chip_name = "polaris10";
989                 break;
990         case CHIP_POLARIS11:
991                 chip_name = "polaris11";
992                 break;
993         case CHIP_POLARIS12:
994                 chip_name = "polaris12";
995                 break;
996         case CHIP_VEGAM:
997                 chip_name = "vegam";
998                 break;
999         default:
1000                 BUG();
1001         }
1002
1003         if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1004                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp_2.bin", chip_name);
1005                 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
1006                 if (err == -ENOENT) {
1007                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
1008                         err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
1009                 }
1010         } else {
1011                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
1012                 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
1013         }
1014         if (err)
1015                 goto out;
1016         err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
1017         if (err)
1018                 goto out;
1019         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1020         adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1021         adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1022
1023         if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1024                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me_2.bin", chip_name);
1025                 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1026                 if (err == -ENOENT) {
1027                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1028                         err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1029                 }
1030         } else {
1031                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1032                 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1033         }
1034         if (err)
1035                 goto out;
1036         err = amdgpu_ucode_validate(adev->gfx.me_fw);
1037         if (err)
1038                 goto out;
1039         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1040         adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1041
1042         adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1043
1044         if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1045                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce_2.bin", chip_name);
1046                 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1047                 if (err == -ENOENT) {
1048                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1049                         err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1050                 }
1051         } else {
1052                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1053                 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1054         }
1055         if (err)
1056                 goto out;
1057         err = amdgpu_ucode_validate(adev->gfx.ce_fw);
1058         if (err)
1059                 goto out;
1060         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1061         adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1062         adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1063
1064         /*
1065          * Support for MCBP/Virtualization in combination with chained IBs is
1066          * formal released on feature version #46
1067          */
1068         if (adev->gfx.ce_feature_version >= 46 &&
1069             adev->gfx.pfp_feature_version >= 46) {
1070                 adev->virt.chained_ib_support = true;
1071                 DRM_INFO("Chained IB support enabled!\n");
1072         } else
1073                 adev->virt.chained_ib_support = false;
1074
1075         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
1076         err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
1077         if (err)
1078                 goto out;
1079         err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
1080         rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1081         adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
1082         adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
1083
1084         adev->gfx.rlc.save_and_restore_offset =
1085                         le32_to_cpu(rlc_hdr->save_and_restore_offset);
1086         adev->gfx.rlc.clear_state_descriptor_offset =
1087                         le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
1088         adev->gfx.rlc.avail_scratch_ram_locations =
1089                         le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
1090         adev->gfx.rlc.reg_restore_list_size =
1091                         le32_to_cpu(rlc_hdr->reg_restore_list_size);
1092         adev->gfx.rlc.reg_list_format_start =
1093                         le32_to_cpu(rlc_hdr->reg_list_format_start);
1094         adev->gfx.rlc.reg_list_format_separate_start =
1095                         le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
1096         adev->gfx.rlc.starting_offsets_start =
1097                         le32_to_cpu(rlc_hdr->starting_offsets_start);
1098         adev->gfx.rlc.reg_list_format_size_bytes =
1099                         le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
1100         adev->gfx.rlc.reg_list_size_bytes =
1101                         le32_to_cpu(rlc_hdr->reg_list_size_bytes);
1102
1103         adev->gfx.rlc.register_list_format =
1104                         kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
1105                                         adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
1106
1107         if (!adev->gfx.rlc.register_list_format) {
1108                 err = -ENOMEM;
1109                 goto out;
1110         }
1111
1112         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1113                         le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1114         for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++)
1115                 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
1116
1117         adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1118
1119         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1120                         le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1121         for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++)
1122                 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
1123
1124         if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1125                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec_2.bin", chip_name);
1126                 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1127                 if (err == -ENOENT) {
1128                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1129                         err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1130                 }
1131         } else {
1132                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1133                 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1134         }
1135         if (err)
1136                 goto out;
1137         err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1138         if (err)
1139                 goto out;
1140         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1141         adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1142         adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1143
1144         if ((adev->asic_type != CHIP_STONEY) &&
1145             (adev->asic_type != CHIP_TOPAZ)) {
1146                 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1147                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2_2.bin", chip_name);
1148                         err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1149                         if (err == -ENOENT) {
1150                                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1151                                 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1152                         }
1153                 } else {
1154                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1155                         err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1156                 }
1157                 if (!err) {
1158                         err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1159                         if (err)
1160                                 goto out;
1161                         cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1162                                 adev->gfx.mec2_fw->data;
1163                         adev->gfx.mec2_fw_version =
1164                                 le32_to_cpu(cp_hdr->header.ucode_version);
1165                         adev->gfx.mec2_feature_version =
1166                                 le32_to_cpu(cp_hdr->ucode_feature_version);
1167                 } else {
1168                         err = 0;
1169                         adev->gfx.mec2_fw = NULL;
1170                 }
1171         }
1172
1173         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1174         info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1175         info->fw = adev->gfx.pfp_fw;
1176         header = (const struct common_firmware_header *)info->fw->data;
1177         adev->firmware.fw_size +=
1178                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1179
1180         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1181         info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1182         info->fw = adev->gfx.me_fw;
1183         header = (const struct common_firmware_header *)info->fw->data;
1184         adev->firmware.fw_size +=
1185                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1186
1187         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1188         info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1189         info->fw = adev->gfx.ce_fw;
1190         header = (const struct common_firmware_header *)info->fw->data;
1191         adev->firmware.fw_size +=
1192                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1193
1194         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1195         info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1196         info->fw = adev->gfx.rlc_fw;
1197         header = (const struct common_firmware_header *)info->fw->data;
1198         adev->firmware.fw_size +=
1199                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1200
1201         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1202         info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1203         info->fw = adev->gfx.mec_fw;
1204         header = (const struct common_firmware_header *)info->fw->data;
1205         adev->firmware.fw_size +=
1206                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1207
1208         /* we need account JT in */
1209         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1210         adev->firmware.fw_size +=
1211                 ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE);
1212
1213         if (amdgpu_sriov_vf(adev)) {
1214                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_STORAGE];
1215                 info->ucode_id = AMDGPU_UCODE_ID_STORAGE;
1216                 info->fw = adev->gfx.mec_fw;
1217                 adev->firmware.fw_size +=
1218                         ALIGN(le32_to_cpu(64 * PAGE_SIZE), PAGE_SIZE);
1219         }
1220
1221         if (adev->gfx.mec2_fw) {
1222                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1223                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1224                 info->fw = adev->gfx.mec2_fw;
1225                 header = (const struct common_firmware_header *)info->fw->data;
1226                 adev->firmware.fw_size +=
1227                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1228         }
1229
1230 out:
1231         if (err) {
1232                 dev_err(adev->dev,
1233                         "gfx8: Failed to load firmware \"%s\"\n",
1234                         fw_name);
1235                 release_firmware(adev->gfx.pfp_fw);
1236                 adev->gfx.pfp_fw = NULL;
1237                 release_firmware(adev->gfx.me_fw);
1238                 adev->gfx.me_fw = NULL;
1239                 release_firmware(adev->gfx.ce_fw);
1240                 adev->gfx.ce_fw = NULL;
1241                 release_firmware(adev->gfx.rlc_fw);
1242                 adev->gfx.rlc_fw = NULL;
1243                 release_firmware(adev->gfx.mec_fw);
1244                 adev->gfx.mec_fw = NULL;
1245                 release_firmware(adev->gfx.mec2_fw);
1246                 adev->gfx.mec2_fw = NULL;
1247         }
1248         return err;
1249 }
1250
1251 static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
1252                                     volatile u32 *buffer)
1253 {
1254         u32 count = 0, i;
1255         const struct cs_section_def *sect = NULL;
1256         const struct cs_extent_def *ext = NULL;
1257
1258         if (adev->gfx.rlc.cs_data == NULL)
1259                 return;
1260         if (buffer == NULL)
1261                 return;
1262
1263         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1264         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1265
1266         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1267         buffer[count++] = cpu_to_le32(0x80000000);
1268         buffer[count++] = cpu_to_le32(0x80000000);
1269
1270         for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1271                 for (ext = sect->section; ext->extent != NULL; ++ext) {
1272                         if (sect->id == SECT_CONTEXT) {
1273                                 buffer[count++] =
1274                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1275                                 buffer[count++] = cpu_to_le32(ext->reg_index -
1276                                                 PACKET3_SET_CONTEXT_REG_START);
1277                                 for (i = 0; i < ext->reg_count; i++)
1278                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
1279                         } else {
1280                                 return;
1281                         }
1282                 }
1283         }
1284
1285         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1286         buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
1287                         PACKET3_SET_CONTEXT_REG_START);
1288         buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config);
1289         buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1);
1290
1291         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1292         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1293
1294         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1295         buffer[count++] = cpu_to_le32(0);
1296 }
1297
1298 static int gfx_v8_0_cp_jump_table_num(struct amdgpu_device *adev)
1299 {
1300         if (adev->asic_type == CHIP_CARRIZO)
1301                 return 5;
1302         else
1303                 return 4;
1304 }
1305
1306 static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
1307 {
1308         const struct cs_section_def *cs_data;
1309         int r;
1310
1311         adev->gfx.rlc.cs_data = vi_cs_data;
1312
1313         cs_data = adev->gfx.rlc.cs_data;
1314
1315         if (cs_data) {
1316                 /* init clear state block */
1317                 r = amdgpu_gfx_rlc_init_csb(adev);
1318                 if (r)
1319                         return r;
1320         }
1321
1322         if ((adev->asic_type == CHIP_CARRIZO) ||
1323             (adev->asic_type == CHIP_STONEY)) {
1324                 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1325                 r = amdgpu_gfx_rlc_init_cpt(adev);
1326                 if (r)
1327                         return r;
1328         }
1329
1330         /* init spm vmid with 0xf */
1331         if (adev->gfx.rlc.funcs->update_spm_vmid)
1332                 adev->gfx.rlc.funcs->update_spm_vmid(adev, 0xf);
1333
1334         return 0;
1335 }
1336
1337 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
1338 {
1339         amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1340 }
1341
1342 static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
1343 {
1344         int r;
1345         u32 *hpd;
1346         size_t mec_hpd_size;
1347
1348         bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1349
1350         /* take ownership of the relevant compute queues */
1351         amdgpu_gfx_compute_queue_acquire(adev);
1352
1353         mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE;
1354         if (mec_hpd_size) {
1355                 r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1356                                               AMDGPU_GEM_DOMAIN_VRAM,
1357                                               &adev->gfx.mec.hpd_eop_obj,
1358                                               &adev->gfx.mec.hpd_eop_gpu_addr,
1359                                               (void **)&hpd);
1360                 if (r) {
1361                         dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1362                         return r;
1363                 }
1364
1365                 memset(hpd, 0, mec_hpd_size);
1366
1367                 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1368                 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1369         }
1370
1371         return 0;
1372 }
1373
1374 static const u32 vgpr_init_compute_shader[] =
1375 {
1376         0x7e000209, 0x7e020208,
1377         0x7e040207, 0x7e060206,
1378         0x7e080205, 0x7e0a0204,
1379         0x7e0c0203, 0x7e0e0202,
1380         0x7e100201, 0x7e120200,
1381         0x7e140209, 0x7e160208,
1382         0x7e180207, 0x7e1a0206,
1383         0x7e1c0205, 0x7e1e0204,
1384         0x7e200203, 0x7e220202,
1385         0x7e240201, 0x7e260200,
1386         0x7e280209, 0x7e2a0208,
1387         0x7e2c0207, 0x7e2e0206,
1388         0x7e300205, 0x7e320204,
1389         0x7e340203, 0x7e360202,
1390         0x7e380201, 0x7e3a0200,
1391         0x7e3c0209, 0x7e3e0208,
1392         0x7e400207, 0x7e420206,
1393         0x7e440205, 0x7e460204,
1394         0x7e480203, 0x7e4a0202,
1395         0x7e4c0201, 0x7e4e0200,
1396         0x7e500209, 0x7e520208,
1397         0x7e540207, 0x7e560206,
1398         0x7e580205, 0x7e5a0204,
1399         0x7e5c0203, 0x7e5e0202,
1400         0x7e600201, 0x7e620200,
1401         0x7e640209, 0x7e660208,
1402         0x7e680207, 0x7e6a0206,
1403         0x7e6c0205, 0x7e6e0204,
1404         0x7e700203, 0x7e720202,
1405         0x7e740201, 0x7e760200,
1406         0x7e780209, 0x7e7a0208,
1407         0x7e7c0207, 0x7e7e0206,
1408         0xbf8a0000, 0xbf810000,
1409 };
1410
1411 static const u32 sgpr_init_compute_shader[] =
1412 {
1413         0xbe8a0100, 0xbe8c0102,
1414         0xbe8e0104, 0xbe900106,
1415         0xbe920108, 0xbe940100,
1416         0xbe960102, 0xbe980104,
1417         0xbe9a0106, 0xbe9c0108,
1418         0xbe9e0100, 0xbea00102,
1419         0xbea20104, 0xbea40106,
1420         0xbea60108, 0xbea80100,
1421         0xbeaa0102, 0xbeac0104,
1422         0xbeae0106, 0xbeb00108,
1423         0xbeb20100, 0xbeb40102,
1424         0xbeb60104, 0xbeb80106,
1425         0xbeba0108, 0xbebc0100,
1426         0xbebe0102, 0xbec00104,
1427         0xbec20106, 0xbec40108,
1428         0xbec60100, 0xbec80102,
1429         0xbee60004, 0xbee70005,
1430         0xbeea0006, 0xbeeb0007,
1431         0xbee80008, 0xbee90009,
1432         0xbefc0000, 0xbf8a0000,
1433         0xbf810000, 0x00000000,
1434 };
1435
1436 static const u32 vgpr_init_regs[] =
1437 {
1438         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1439         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */
1440         mmCOMPUTE_NUM_THREAD_X, 256*4,
1441         mmCOMPUTE_NUM_THREAD_Y, 1,
1442         mmCOMPUTE_NUM_THREAD_Z, 1,
1443         mmCOMPUTE_PGM_RSRC1, 0x100004f, /* VGPRS=15 (64 logical VGPRs), SGPRS=1 (16 SGPRs), BULKY=1 */
1444         mmCOMPUTE_PGM_RSRC2, 20,
1445         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1446         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1447         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1448         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1449         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1450         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1451         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1452         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1453         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1454         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1455 };
1456
1457 static const u32 sgpr1_init_regs[] =
1458 {
1459         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1460         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */
1461         mmCOMPUTE_NUM_THREAD_X, 256*5,
1462         mmCOMPUTE_NUM_THREAD_Y, 1,
1463         mmCOMPUTE_NUM_THREAD_Z, 1,
1464         mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */
1465         mmCOMPUTE_PGM_RSRC2, 20,
1466         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1467         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1468         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1469         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1470         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1471         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1472         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1473         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1474         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1475         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1476 };
1477
1478 static const u32 sgpr2_init_regs[] =
1479 {
1480         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1481         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1482         mmCOMPUTE_NUM_THREAD_X, 256*5,
1483         mmCOMPUTE_NUM_THREAD_Y, 1,
1484         mmCOMPUTE_NUM_THREAD_Z, 1,
1485         mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */
1486         mmCOMPUTE_PGM_RSRC2, 20,
1487         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1488         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1489         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1490         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1491         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1492         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1493         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1494         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1495         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1496         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1497 };
1498
1499 static const u32 sec_ded_counter_registers[] =
1500 {
1501         mmCPC_EDC_ATC_CNT,
1502         mmCPC_EDC_SCRATCH_CNT,
1503         mmCPC_EDC_UCODE_CNT,
1504         mmCPF_EDC_ATC_CNT,
1505         mmCPF_EDC_ROQ_CNT,
1506         mmCPF_EDC_TAG_CNT,
1507         mmCPG_EDC_ATC_CNT,
1508         mmCPG_EDC_DMA_CNT,
1509         mmCPG_EDC_TAG_CNT,
1510         mmDC_EDC_CSINVOC_CNT,
1511         mmDC_EDC_RESTORE_CNT,
1512         mmDC_EDC_STATE_CNT,
1513         mmGDS_EDC_CNT,
1514         mmGDS_EDC_GRBM_CNT,
1515         mmGDS_EDC_OA_DED,
1516         mmSPI_EDC_CNT,
1517         mmSQC_ATC_EDC_GATCL1_CNT,
1518         mmSQC_EDC_CNT,
1519         mmSQ_EDC_DED_CNT,
1520         mmSQ_EDC_INFO,
1521         mmSQ_EDC_SEC_CNT,
1522         mmTCC_EDC_CNT,
1523         mmTCP_ATC_EDC_GATCL1_CNT,
1524         mmTCP_EDC_CNT,
1525         mmTD_EDC_CNT
1526 };
1527
1528 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1529 {
1530         struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1531         struct amdgpu_ib ib;
1532         struct dma_fence *f = NULL;
1533         int r, i;
1534         u32 tmp;
1535         unsigned total_size, vgpr_offset, sgpr_offset;
1536         u64 gpu_addr;
1537
1538         /* only supported on CZ */
1539         if (adev->asic_type != CHIP_CARRIZO)
1540                 return 0;
1541
1542         /* bail if the compute ring is not ready */
1543         if (!ring->sched.ready)
1544                 return 0;
1545
1546         tmp = RREG32(mmGB_EDC_MODE);
1547         WREG32(mmGB_EDC_MODE, 0);
1548
1549         total_size =
1550                 (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1551         total_size +=
1552                 (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1553         total_size +=
1554                 (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1555         total_size = ALIGN(total_size, 256);
1556         vgpr_offset = total_size;
1557         total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1558         sgpr_offset = total_size;
1559         total_size += sizeof(sgpr_init_compute_shader);
1560
1561         /* allocate an indirect buffer to put the commands in */
1562         memset(&ib, 0, sizeof(ib));
1563         r = amdgpu_ib_get(adev, NULL, total_size,
1564                                         AMDGPU_IB_POOL_DIRECT, &ib);
1565         if (r) {
1566                 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1567                 return r;
1568         }
1569
1570         /* load the compute shaders */
1571         for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1572                 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1573
1574         for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1575                 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1576
1577         /* init the ib length to 0 */
1578         ib.length_dw = 0;
1579
1580         /* VGPR */
1581         /* write the register state for the compute dispatch */
1582         for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1583                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1584                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1585                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1586         }
1587         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1588         gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1589         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1590         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1591         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1592         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1593
1594         /* write dispatch packet */
1595         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1596         ib.ptr[ib.length_dw++] = 8; /* x */
1597         ib.ptr[ib.length_dw++] = 1; /* y */
1598         ib.ptr[ib.length_dw++] = 1; /* z */
1599         ib.ptr[ib.length_dw++] =
1600                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1601
1602         /* write CS partial flush packet */
1603         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1604         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1605
1606         /* SGPR1 */
1607         /* write the register state for the compute dispatch */
1608         for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1609                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1610                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1611                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1612         }
1613         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1614         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1615         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1616         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1617         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1618         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1619
1620         /* write dispatch packet */
1621         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1622         ib.ptr[ib.length_dw++] = 8; /* x */
1623         ib.ptr[ib.length_dw++] = 1; /* y */
1624         ib.ptr[ib.length_dw++] = 1; /* z */
1625         ib.ptr[ib.length_dw++] =
1626                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1627
1628         /* write CS partial flush packet */
1629         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1630         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1631
1632         /* SGPR2 */
1633         /* write the register state for the compute dispatch */
1634         for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1635                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1636                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1637                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1638         }
1639         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1640         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1641         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1642         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1643         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1644         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1645
1646         /* write dispatch packet */
1647         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1648         ib.ptr[ib.length_dw++] = 8; /* x */
1649         ib.ptr[ib.length_dw++] = 1; /* y */
1650         ib.ptr[ib.length_dw++] = 1; /* z */
1651         ib.ptr[ib.length_dw++] =
1652                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1653
1654         /* write CS partial flush packet */
1655         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1656         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1657
1658         /* shedule the ib on the ring */
1659         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1660         if (r) {
1661                 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1662                 goto fail;
1663         }
1664
1665         /* wait for the GPU to finish processing the IB */
1666         r = dma_fence_wait(f, false);
1667         if (r) {
1668                 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1669                 goto fail;
1670         }
1671
1672         tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1673         tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1674         WREG32(mmGB_EDC_MODE, tmp);
1675
1676         tmp = RREG32(mmCC_GC_EDC_CONFIG);
1677         tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1678         WREG32(mmCC_GC_EDC_CONFIG, tmp);
1679
1680
1681         /* read back registers to clear the counters */
1682         for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1683                 RREG32(sec_ded_counter_registers[i]);
1684
1685 fail:
1686         amdgpu_ib_free(adev, &ib, NULL);
1687         dma_fence_put(f);
1688
1689         return r;
1690 }
1691
1692 static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1693 {
1694         u32 gb_addr_config;
1695         u32 mc_arb_ramcfg;
1696         u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1697         u32 tmp;
1698         int ret;
1699
1700         switch (adev->asic_type) {
1701         case CHIP_TOPAZ:
1702                 adev->gfx.config.max_shader_engines = 1;
1703                 adev->gfx.config.max_tile_pipes = 2;
1704                 adev->gfx.config.max_cu_per_sh = 6;
1705                 adev->gfx.config.max_sh_per_se = 1;
1706                 adev->gfx.config.max_backends_per_se = 2;
1707                 adev->gfx.config.max_texture_channel_caches = 2;
1708                 adev->gfx.config.max_gprs = 256;
1709                 adev->gfx.config.max_gs_threads = 32;
1710                 adev->gfx.config.max_hw_contexts = 8;
1711
1712                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1713                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1714                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1715                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1716                 gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1717                 break;
1718         case CHIP_FIJI:
1719                 adev->gfx.config.max_shader_engines = 4;
1720                 adev->gfx.config.max_tile_pipes = 16;
1721                 adev->gfx.config.max_cu_per_sh = 16;
1722                 adev->gfx.config.max_sh_per_se = 1;
1723                 adev->gfx.config.max_backends_per_se = 4;
1724                 adev->gfx.config.max_texture_channel_caches = 16;
1725                 adev->gfx.config.max_gprs = 256;
1726                 adev->gfx.config.max_gs_threads = 32;
1727                 adev->gfx.config.max_hw_contexts = 8;
1728
1729                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1730                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1731                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1732                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1733                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1734                 break;
1735         case CHIP_POLARIS11:
1736         case CHIP_POLARIS12:
1737                 ret = amdgpu_atombios_get_gfx_info(adev);
1738                 if (ret)
1739                         return ret;
1740                 adev->gfx.config.max_gprs = 256;
1741                 adev->gfx.config.max_gs_threads = 32;
1742                 adev->gfx.config.max_hw_contexts = 8;
1743
1744                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1745                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1746                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1747                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1748                 gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
1749                 break;
1750         case CHIP_POLARIS10:
1751         case CHIP_VEGAM:
1752                 ret = amdgpu_atombios_get_gfx_info(adev);
1753                 if (ret)
1754                         return ret;
1755                 adev->gfx.config.max_gprs = 256;
1756                 adev->gfx.config.max_gs_threads = 32;
1757                 adev->gfx.config.max_hw_contexts = 8;
1758
1759                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1760                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1761                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1762                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1763                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1764                 break;
1765         case CHIP_TONGA:
1766                 adev->gfx.config.max_shader_engines = 4;
1767                 adev->gfx.config.max_tile_pipes = 8;
1768                 adev->gfx.config.max_cu_per_sh = 8;
1769                 adev->gfx.config.max_sh_per_se = 1;
1770                 adev->gfx.config.max_backends_per_se = 2;
1771                 adev->gfx.config.max_texture_channel_caches = 8;
1772                 adev->gfx.config.max_gprs = 256;
1773                 adev->gfx.config.max_gs_threads = 32;
1774                 adev->gfx.config.max_hw_contexts = 8;
1775
1776                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1777                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1778                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1779                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1780                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1781                 break;
1782         case CHIP_CARRIZO:
1783                 adev->gfx.config.max_shader_engines = 1;
1784                 adev->gfx.config.max_tile_pipes = 2;
1785                 adev->gfx.config.max_sh_per_se = 1;
1786                 adev->gfx.config.max_backends_per_se = 2;
1787                 adev->gfx.config.max_cu_per_sh = 8;
1788                 adev->gfx.config.max_texture_channel_caches = 2;
1789                 adev->gfx.config.max_gprs = 256;
1790                 adev->gfx.config.max_gs_threads = 32;
1791                 adev->gfx.config.max_hw_contexts = 8;
1792
1793                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1794                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1795                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1796                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1797                 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1798                 break;
1799         case CHIP_STONEY:
1800                 adev->gfx.config.max_shader_engines = 1;
1801                 adev->gfx.config.max_tile_pipes = 2;
1802                 adev->gfx.config.max_sh_per_se = 1;
1803                 adev->gfx.config.max_backends_per_se = 1;
1804                 adev->gfx.config.max_cu_per_sh = 3;
1805                 adev->gfx.config.max_texture_channel_caches = 2;
1806                 adev->gfx.config.max_gprs = 256;
1807                 adev->gfx.config.max_gs_threads = 16;
1808                 adev->gfx.config.max_hw_contexts = 8;
1809
1810                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1811                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1812                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1813                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1814                 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1815                 break;
1816         default:
1817                 adev->gfx.config.max_shader_engines = 2;
1818                 adev->gfx.config.max_tile_pipes = 4;
1819                 adev->gfx.config.max_cu_per_sh = 2;
1820                 adev->gfx.config.max_sh_per_se = 1;
1821                 adev->gfx.config.max_backends_per_se = 2;
1822                 adev->gfx.config.max_texture_channel_caches = 4;
1823                 adev->gfx.config.max_gprs = 256;
1824                 adev->gfx.config.max_gs_threads = 32;
1825                 adev->gfx.config.max_hw_contexts = 8;
1826
1827                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1828                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1829                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1830                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1831                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1832                 break;
1833         }
1834
1835         adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1836         mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1837
1838         adev->gfx.config.num_banks = REG_GET_FIELD(mc_arb_ramcfg,
1839                                 MC_ARB_RAMCFG, NOOFBANK);
1840         adev->gfx.config.num_ranks = REG_GET_FIELD(mc_arb_ramcfg,
1841                                 MC_ARB_RAMCFG, NOOFRANKS);
1842
1843         adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1844         adev->gfx.config.mem_max_burst_length_bytes = 256;
1845         if (adev->flags & AMD_IS_APU) {
1846                 /* Get memory bank mapping mode. */
1847                 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
1848                 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1849                 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1850
1851                 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
1852                 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1853                 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1854
1855                 /* Validate settings in case only one DIMM installed. */
1856                 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
1857                         dimm00_addr_map = 0;
1858                 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
1859                         dimm01_addr_map = 0;
1860                 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
1861                         dimm10_addr_map = 0;
1862                 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
1863                         dimm11_addr_map = 0;
1864
1865                 /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1866                 /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1867                 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
1868                         adev->gfx.config.mem_row_size_in_kb = 2;
1869                 else
1870                         adev->gfx.config.mem_row_size_in_kb = 1;
1871         } else {
1872                 tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
1873                 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1874                 if (adev->gfx.config.mem_row_size_in_kb > 4)
1875                         adev->gfx.config.mem_row_size_in_kb = 4;
1876         }
1877
1878         adev->gfx.config.shader_engine_tile_size = 32;
1879         adev->gfx.config.num_gpus = 1;
1880         adev->gfx.config.multi_gpu_tile_size = 64;
1881
1882         /* fix up row size */
1883         switch (adev->gfx.config.mem_row_size_in_kb) {
1884         case 1:
1885         default:
1886                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
1887                 break;
1888         case 2:
1889                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
1890                 break;
1891         case 4:
1892                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
1893                 break;
1894         }
1895         adev->gfx.config.gb_addr_config = gb_addr_config;
1896
1897         return 0;
1898 }
1899
1900 static int gfx_v8_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
1901                                         int mec, int pipe, int queue)
1902 {
1903         int r;
1904         unsigned irq_type;
1905         struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
1906         unsigned int hw_prio;
1907
1908         ring = &adev->gfx.compute_ring[ring_id];
1909
1910         /* mec0 is me1 */
1911         ring->me = mec + 1;
1912         ring->pipe = pipe;
1913         ring->queue = queue;
1914
1915         ring->ring_obj = NULL;
1916         ring->use_doorbell = true;
1917         ring->doorbell_index = adev->doorbell_index.mec_ring0 + ring_id;
1918         ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
1919                                 + (ring_id * GFX8_MEC_HPD_SIZE);
1920         sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
1921
1922         irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
1923                 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
1924                 + ring->pipe;
1925
1926         hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring->queue) ?
1927                         AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_RING_PRIO_DEFAULT;
1928         /* type-2 packets are deprecated on MEC, use type-3 instead */
1929         r = amdgpu_ring_init(adev, ring, 1024,
1930                              &adev->gfx.eop_irq, irq_type, hw_prio);
1931         if (r)
1932                 return r;
1933
1934
1935         return 0;
1936 }
1937
1938 static void gfx_v8_0_sq_irq_work_func(struct work_struct *work);
1939
1940 static int gfx_v8_0_sw_init(void *handle)
1941 {
1942         int i, j, k, r, ring_id;
1943         struct amdgpu_ring *ring;
1944         struct amdgpu_kiq *kiq;
1945         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1946
1947         switch (adev->asic_type) {
1948         case CHIP_TONGA:
1949         case CHIP_CARRIZO:
1950         case CHIP_FIJI:
1951         case CHIP_POLARIS10:
1952         case CHIP_POLARIS11:
1953         case CHIP_POLARIS12:
1954         case CHIP_VEGAM:
1955                 adev->gfx.mec.num_mec = 2;
1956                 break;
1957         case CHIP_TOPAZ:
1958         case CHIP_STONEY:
1959         default:
1960                 adev->gfx.mec.num_mec = 1;
1961                 break;
1962         }
1963
1964         adev->gfx.mec.num_pipe_per_mec = 4;
1965         adev->gfx.mec.num_queue_per_pipe = 8;
1966
1967         /* EOP Event */
1968         r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_END_OF_PIPE, &adev->gfx.eop_irq);
1969         if (r)
1970                 return r;
1971
1972         /* Privileged reg */
1973         r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_PRIV_REG_FAULT,
1974                               &adev->gfx.priv_reg_irq);
1975         if (r)
1976                 return r;
1977
1978         /* Privileged inst */
1979         r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_PRIV_INSTR_FAULT,
1980                               &adev->gfx.priv_inst_irq);
1981         if (r)
1982                 return r;
1983
1984         /* Add CP EDC/ECC irq  */
1985         r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_ECC_ERROR,
1986                               &adev->gfx.cp_ecc_error_irq);
1987         if (r)
1988                 return r;
1989
1990         /* SQ interrupts. */
1991         r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SQ_INTERRUPT_MSG,
1992                               &adev->gfx.sq_irq);
1993         if (r) {
1994                 DRM_ERROR("amdgpu_irq_add() for SQ failed: %d\n", r);
1995                 return r;
1996         }
1997
1998         INIT_WORK(&adev->gfx.sq_work.work, gfx_v8_0_sq_irq_work_func);
1999
2000         adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2001
2002         gfx_v8_0_scratch_init(adev);
2003
2004         r = gfx_v8_0_init_microcode(adev);
2005         if (r) {
2006                 DRM_ERROR("Failed to load gfx firmware!\n");
2007                 return r;
2008         }
2009
2010         r = adev->gfx.rlc.funcs->init(adev);
2011         if (r) {
2012                 DRM_ERROR("Failed to init rlc BOs!\n");
2013                 return r;
2014         }
2015
2016         r = gfx_v8_0_mec_init(adev);
2017         if (r) {
2018                 DRM_ERROR("Failed to init MEC BOs!\n");
2019                 return r;
2020         }
2021
2022         /* set up the gfx ring */
2023         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2024                 ring = &adev->gfx.gfx_ring[i];
2025                 ring->ring_obj = NULL;
2026                 sprintf(ring->name, "gfx");
2027                 /* no gfx doorbells on iceland */
2028                 if (adev->asic_type != CHIP_TOPAZ) {
2029                         ring->use_doorbell = true;
2030                         ring->doorbell_index = adev->doorbell_index.gfx_ring0;
2031                 }
2032
2033                 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2034                                      AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP,
2035                                      AMDGPU_RING_PRIO_DEFAULT);
2036                 if (r)
2037                         return r;
2038         }
2039
2040
2041         /* set up the compute queues - allocate horizontally across pipes */
2042         ring_id = 0;
2043         for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
2044                 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
2045                         for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
2046                                 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
2047                                         continue;
2048
2049                                 r = gfx_v8_0_compute_ring_init(adev,
2050                                                                 ring_id,
2051                                                                 i, k, j);
2052                                 if (r)
2053                                         return r;
2054
2055                                 ring_id++;
2056                         }
2057                 }
2058         }
2059
2060         r = amdgpu_gfx_kiq_init(adev, GFX8_MEC_HPD_SIZE);
2061         if (r) {
2062                 DRM_ERROR("Failed to init KIQ BOs!\n");
2063                 return r;
2064         }
2065
2066         kiq = &adev->gfx.kiq;
2067         r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
2068         if (r)
2069                 return r;
2070
2071         /* create MQD for all compute queues as well as KIQ for SRIOV case */
2072         r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct vi_mqd_allocation));
2073         if (r)
2074                 return r;
2075
2076         adev->gfx.ce_ram_size = 0x8000;
2077
2078         r = gfx_v8_0_gpu_early_init(adev);
2079         if (r)
2080                 return r;
2081
2082         return 0;
2083 }
2084
2085 static int gfx_v8_0_sw_fini(void *handle)
2086 {
2087         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2088         int i;
2089
2090         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2091                 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2092         for (i = 0; i < adev->gfx.num_compute_rings; i++)
2093                 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2094
2095         amdgpu_gfx_mqd_sw_fini(adev);
2096         amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring);
2097         amdgpu_gfx_kiq_fini(adev);
2098
2099         gfx_v8_0_mec_fini(adev);
2100         amdgpu_gfx_rlc_fini(adev);
2101         amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
2102                                 &adev->gfx.rlc.clear_state_gpu_addr,
2103                                 (void **)&adev->gfx.rlc.cs_ptr);
2104         if ((adev->asic_type == CHIP_CARRIZO) ||
2105             (adev->asic_type == CHIP_STONEY)) {
2106                 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
2107                                 &adev->gfx.rlc.cp_table_gpu_addr,
2108                                 (void **)&adev->gfx.rlc.cp_table_ptr);
2109         }
2110         gfx_v8_0_free_microcode(adev);
2111
2112         return 0;
2113 }
2114
2115 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
2116 {
2117         uint32_t *modearray, *mod2array;
2118         const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
2119         const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
2120         u32 reg_offset;
2121
2122         modearray = adev->gfx.config.tile_mode_array;
2123         mod2array = adev->gfx.config.macrotile_mode_array;
2124
2125         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2126                 modearray[reg_offset] = 0;
2127
2128         for (reg_offset = 0; reg_offset <  num_secondary_tile_mode_states; reg_offset++)
2129                 mod2array[reg_offset] = 0;
2130
2131         switch (adev->asic_type) {
2132         case CHIP_TOPAZ:
2133                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2134                                 PIPE_CONFIG(ADDR_SURF_P2) |
2135                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2136                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2137                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2138                                 PIPE_CONFIG(ADDR_SURF_P2) |
2139                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2140                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2141                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2142                                 PIPE_CONFIG(ADDR_SURF_P2) |
2143                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2144                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2145                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2146                                 PIPE_CONFIG(ADDR_SURF_P2) |
2147                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2148                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2149                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2150                                 PIPE_CONFIG(ADDR_SURF_P2) |
2151                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2152                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2153                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2154                                 PIPE_CONFIG(ADDR_SURF_P2) |
2155                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2156                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2157                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2158                                 PIPE_CONFIG(ADDR_SURF_P2) |
2159                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2160                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2161                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2162                                 PIPE_CONFIG(ADDR_SURF_P2));
2163                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2164                                 PIPE_CONFIG(ADDR_SURF_P2) |
2165                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2166                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2167                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2168                                  PIPE_CONFIG(ADDR_SURF_P2) |
2169                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2170                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2171                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2172                                  PIPE_CONFIG(ADDR_SURF_P2) |
2173                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2174                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2175                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2176                                  PIPE_CONFIG(ADDR_SURF_P2) |
2177                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2178                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2179                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2180                                  PIPE_CONFIG(ADDR_SURF_P2) |
2181                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2182                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2183                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2184                                  PIPE_CONFIG(ADDR_SURF_P2) |
2185                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2186                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2187                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2188                                  PIPE_CONFIG(ADDR_SURF_P2) |
2189                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2190                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2191                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2192                                  PIPE_CONFIG(ADDR_SURF_P2) |
2193                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2194                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2195                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2196                                  PIPE_CONFIG(ADDR_SURF_P2) |
2197                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2198                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2199                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2200                                  PIPE_CONFIG(ADDR_SURF_P2) |
2201                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2202                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2203                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2204                                  PIPE_CONFIG(ADDR_SURF_P2) |
2205                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2206                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2207                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2208                                  PIPE_CONFIG(ADDR_SURF_P2) |
2209                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2210                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2211                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2212                                  PIPE_CONFIG(ADDR_SURF_P2) |
2213                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2214                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2215                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2216                                  PIPE_CONFIG(ADDR_SURF_P2) |
2217                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2218                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2219                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2220                                  PIPE_CONFIG(ADDR_SURF_P2) |
2221                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2222                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2223                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2224                                  PIPE_CONFIG(ADDR_SURF_P2) |
2225                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2226                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2227                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2228                                  PIPE_CONFIG(ADDR_SURF_P2) |
2229                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2230                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2231                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2232                                  PIPE_CONFIG(ADDR_SURF_P2) |
2233                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2234                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2235
2236                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2237                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2238                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2239                                 NUM_BANKS(ADDR_SURF_8_BANK));
2240                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2241                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2242                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2243                                 NUM_BANKS(ADDR_SURF_8_BANK));
2244                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2245                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2246                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2247                                 NUM_BANKS(ADDR_SURF_8_BANK));
2248                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2249                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2250                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2251                                 NUM_BANKS(ADDR_SURF_8_BANK));
2252                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2253                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2254                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2255                                 NUM_BANKS(ADDR_SURF_8_BANK));
2256                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2257                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2258                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2259                                 NUM_BANKS(ADDR_SURF_8_BANK));
2260                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2261                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2262                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2263                                 NUM_BANKS(ADDR_SURF_8_BANK));
2264                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2265                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2266                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2267                                 NUM_BANKS(ADDR_SURF_16_BANK));
2268                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2269                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2270                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2271                                 NUM_BANKS(ADDR_SURF_16_BANK));
2272                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2273                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2274                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2275                                  NUM_BANKS(ADDR_SURF_16_BANK));
2276                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2277                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2278                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2279                                  NUM_BANKS(ADDR_SURF_16_BANK));
2280                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2281                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2282                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2283                                  NUM_BANKS(ADDR_SURF_16_BANK));
2284                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2285                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2286                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2287                                  NUM_BANKS(ADDR_SURF_16_BANK));
2288                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2289                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2290                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2291                                  NUM_BANKS(ADDR_SURF_8_BANK));
2292
2293                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2294                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2295                             reg_offset != 23)
2296                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2297
2298                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2299                         if (reg_offset != 7)
2300                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2301
2302                 break;
2303         case CHIP_FIJI:
2304         case CHIP_VEGAM:
2305                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2306                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2307                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2308                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2309                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2310                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2311                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2312                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2313                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2314                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2315                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2316                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2317                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2318                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2319                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2320                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2321                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2322                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2323                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2324                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2325                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2326                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2327                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2328                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2329                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2330                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2331                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2332                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2333                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2334                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2335                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2336                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2337                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2338                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2339                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2340                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2341                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2342                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2343                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2344                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2345                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2346                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2347                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2348                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2349                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2350                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2351                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2352                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2353                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2354                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2355                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2356                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2357                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2358                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2359                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2360                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2361                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2362                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2363                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2364                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2365                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2366                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2367                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2368                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2369                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2370                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2371                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2372                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2373                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2374                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2375                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2376                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2377                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2378                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2379                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2380                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2381                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2382                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2383                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2384                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2385                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2386                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2387                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2388                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2389                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2390                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2391                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2392                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2393                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2394                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2395                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2396                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2397                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2398                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2399                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2400                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2401                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2402                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2403                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2404                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2405                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2406                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2407                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2408                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2409                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2410                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2411                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2412                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2413                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2414                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2415                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2416                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2417                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2418                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2419                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2420                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2421                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2422                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2423                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2424                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2425                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2426                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2427
2428                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2429                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2430                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2431                                 NUM_BANKS(ADDR_SURF_8_BANK));
2432                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2433                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2434                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2435                                 NUM_BANKS(ADDR_SURF_8_BANK));
2436                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2437                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2438                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2439                                 NUM_BANKS(ADDR_SURF_8_BANK));
2440                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2441                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2442                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2443                                 NUM_BANKS(ADDR_SURF_8_BANK));
2444                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2445                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2446                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2447                                 NUM_BANKS(ADDR_SURF_8_BANK));
2448                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2449                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2450                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2451                                 NUM_BANKS(ADDR_SURF_8_BANK));
2452                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2453                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2454                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2455                                 NUM_BANKS(ADDR_SURF_8_BANK));
2456                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2457                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2458                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2459                                 NUM_BANKS(ADDR_SURF_8_BANK));
2460                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2461                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2462                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2463                                 NUM_BANKS(ADDR_SURF_8_BANK));
2464                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2465                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2466                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2467                                  NUM_BANKS(ADDR_SURF_8_BANK));
2468                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2469                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2470                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2471                                  NUM_BANKS(ADDR_SURF_8_BANK));
2472                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2473                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2474                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2475                                  NUM_BANKS(ADDR_SURF_8_BANK));
2476                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2477                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2478                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2479                                  NUM_BANKS(ADDR_SURF_8_BANK));
2480                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2481                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2482                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2483                                  NUM_BANKS(ADDR_SURF_4_BANK));
2484
2485                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2486                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2487
2488                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2489                         if (reg_offset != 7)
2490                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2491
2492                 break;
2493         case CHIP_TONGA:
2494                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2495                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2496                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2497                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2498                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2499                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2500                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2501                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2502                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2503                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2504                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2505                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2506                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2507                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2508                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2509                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2510                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2511                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2512                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2513                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2514                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2515                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2516                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2517                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2518                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2519                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2520                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2521                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2522                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2523                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2524                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2525                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2526                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2527                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2528                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2529                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2530                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2531                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2532                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2533                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2534                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2535                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2536                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2537                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2538                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2539                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2540                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2541                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2542                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2543                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2544                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2545                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2546                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2547                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2548                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2549                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2550                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2551                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2552                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2553                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2554                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2555                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2556                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2557                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2558                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2559                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2560                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2561                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2562                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2563                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2564                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2565                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2566                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2567                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2568                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2569                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2570                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2571                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2572                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2573                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2574                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2575                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2576                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2577                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2578                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2579                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2580                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2581                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2582                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2583                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2584                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2585                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2586                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2587                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2588                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2589                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2590                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2591                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2592                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2593                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2594                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2595                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2596                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2597                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2598                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2599                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2600                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2601                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2602                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2603                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2604                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2605                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2606                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2607                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2608                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2609                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2610                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2611                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2612                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2613                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2614                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2615                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2616
2617                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2618                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2619                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2620                                 NUM_BANKS(ADDR_SURF_16_BANK));
2621                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2622                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2623                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2624                                 NUM_BANKS(ADDR_SURF_16_BANK));
2625                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2626                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2627                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2628                                 NUM_BANKS(ADDR_SURF_16_BANK));
2629                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2630                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2631                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2632                                 NUM_BANKS(ADDR_SURF_16_BANK));
2633                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2634                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2635                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2636                                 NUM_BANKS(ADDR_SURF_16_BANK));
2637                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2638                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2639                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2640                                 NUM_BANKS(ADDR_SURF_16_BANK));
2641                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2642                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2643                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2644                                 NUM_BANKS(ADDR_SURF_16_BANK));
2645                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2646                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2647                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2648                                 NUM_BANKS(ADDR_SURF_16_BANK));
2649                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2650                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2651                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2652                                 NUM_BANKS(ADDR_SURF_16_BANK));
2653                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2654                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2655                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2656                                  NUM_BANKS(ADDR_SURF_16_BANK));
2657                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2658                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2659                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2660                                  NUM_BANKS(ADDR_SURF_16_BANK));
2661                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2662                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2663                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2664                                  NUM_BANKS(ADDR_SURF_8_BANK));
2665                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2666                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2667                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2668                                  NUM_BANKS(ADDR_SURF_4_BANK));
2669                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2670                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2671                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2672                                  NUM_BANKS(ADDR_SURF_4_BANK));
2673
2674                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2675                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2676
2677                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2678                         if (reg_offset != 7)
2679                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2680
2681                 break;
2682         case CHIP_POLARIS11:
2683         case CHIP_POLARIS12:
2684                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2685                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2686                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2687                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2688                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2689                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2690                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2691                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2692                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2693                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2694                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2695                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2696                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2697                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2698                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2699                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2700                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2701                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2702                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2703                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2704                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2705                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2706                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2707                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2708                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2709                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2710                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2711                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2712                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2713                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2714                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2715                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2716                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2717                                 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2718                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2719                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2720                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2721                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2722                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2723                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2724                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2725                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2726                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2727                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2728                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2729                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2730                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2731                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2732                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2733                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2734                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2735                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2736                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2737                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2738                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2739                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2740                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2741                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2742                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2743                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2744                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2745                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2746                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2747                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2748                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2749                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2750                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2751                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2752                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2753                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2754                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2755                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2756                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2757                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2758                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2759                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2760                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2761                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2762                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2763                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2764                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2765                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2766                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2767                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2768                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2769                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2770                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2771                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2772                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2773                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2774                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2775                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2776                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2777                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2778                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2779                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2780                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2781                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2782                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2783                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2784                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2785                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2786                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2787                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2788                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2789                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2790                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2791                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2792                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2793                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2794                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2795                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2796                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2797                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2798                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2799                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2800                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2801                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2802                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2803                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2804                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2805                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2806
2807                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2808                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2809                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2810                                 NUM_BANKS(ADDR_SURF_16_BANK));
2811
2812                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2813                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2814                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2815                                 NUM_BANKS(ADDR_SURF_16_BANK));
2816
2817                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2818                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2819                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2820                                 NUM_BANKS(ADDR_SURF_16_BANK));
2821
2822                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2823                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2824                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2825                                 NUM_BANKS(ADDR_SURF_16_BANK));
2826
2827                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2828                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2829                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2830                                 NUM_BANKS(ADDR_SURF_16_BANK));
2831
2832                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2833                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2834                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2835                                 NUM_BANKS(ADDR_SURF_16_BANK));
2836
2837                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2838                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2839                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2840                                 NUM_BANKS(ADDR_SURF_16_BANK));
2841
2842                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2843                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2844                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2845                                 NUM_BANKS(ADDR_SURF_16_BANK));
2846
2847                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2848                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2849                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2850                                 NUM_BANKS(ADDR_SURF_16_BANK));
2851
2852                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2853                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2854                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2855                                 NUM_BANKS(ADDR_SURF_16_BANK));
2856
2857                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2858                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2859                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2860                                 NUM_BANKS(ADDR_SURF_16_BANK));
2861
2862                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2863                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2864                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2865                                 NUM_BANKS(ADDR_SURF_16_BANK));
2866
2867                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2868                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2869                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2870                                 NUM_BANKS(ADDR_SURF_8_BANK));
2871
2872                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2873                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2874                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2875                                 NUM_BANKS(ADDR_SURF_4_BANK));
2876
2877                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2878                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2879
2880                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2881                         if (reg_offset != 7)
2882                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2883
2884                 break;
2885         case CHIP_POLARIS10:
2886                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2887                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2888                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2889                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2890                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2891                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2892                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2893                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2894                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2895                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2896                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2897                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2898                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2899                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2900                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2901                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2902                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2903                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2904                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2905                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2906                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2907                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2908                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2909                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2910                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2911                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2912                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2913                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2914                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2915                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2916                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2917                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2918                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2919                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2920                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2921                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2922                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2923                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2924                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2925                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2926                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2927                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2928                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2929                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2930                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2931                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2932                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2933                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2934                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2935                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2936                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2937                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2938                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2939                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2940                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2941                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2942                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2943                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2944                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2945                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2946                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2947                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2948                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2949                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2950                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2951                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2952                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2953                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2954                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2955                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2956                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2957                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2958                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2959                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2960                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2961                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2962                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2963                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2964                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2965                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2966                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2967                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2968                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2969                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2970                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2971                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2972                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2973                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2974                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2975                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2976                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2977                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2978                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2979                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2980                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2981                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2982                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2983                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2984                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2985                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2986                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2987                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2988                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2989                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2990                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2991                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2992                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2993                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2994                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2995                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2996                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2997                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2998                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2999                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3000                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3001                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3002                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3003                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3004                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3005                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3006                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3007                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3008
3009                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3010                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3011                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3012                                 NUM_BANKS(ADDR_SURF_16_BANK));
3013
3014                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3015                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3016                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3017                                 NUM_BANKS(ADDR_SURF_16_BANK));
3018
3019                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3020                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3021                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3022                                 NUM_BANKS(ADDR_SURF_16_BANK));
3023
3024                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3025                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3026                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3027                                 NUM_BANKS(ADDR_SURF_16_BANK));
3028
3029                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3030                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3031                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3032                                 NUM_BANKS(ADDR_SURF_16_BANK));
3033
3034                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3035                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3036                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3037                                 NUM_BANKS(ADDR_SURF_16_BANK));
3038
3039                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3040                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3041                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3042                                 NUM_BANKS(ADDR_SURF_16_BANK));
3043
3044                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3045                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3046                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3047                                 NUM_BANKS(ADDR_SURF_16_BANK));
3048
3049                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3050                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3051                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3052                                 NUM_BANKS(ADDR_SURF_16_BANK));
3053
3054                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3055                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3056                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3057                                 NUM_BANKS(ADDR_SURF_16_BANK));
3058
3059                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3060                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3061                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3062                                 NUM_BANKS(ADDR_SURF_16_BANK));
3063
3064                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3065                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3066                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3067                                 NUM_BANKS(ADDR_SURF_8_BANK));
3068
3069                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3070                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3071                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3072                                 NUM_BANKS(ADDR_SURF_4_BANK));
3073
3074                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3075                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3076                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3077                                 NUM_BANKS(ADDR_SURF_4_BANK));
3078
3079                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3080                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3081
3082                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3083                         if (reg_offset != 7)
3084                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3085
3086                 break;
3087         case CHIP_STONEY:
3088                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3089                                 PIPE_CONFIG(ADDR_SURF_P2) |
3090                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3091                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3092                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3093                                 PIPE_CONFIG(ADDR_SURF_P2) |
3094                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3095                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3096                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3097                                 PIPE_CONFIG(ADDR_SURF_P2) |
3098                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3099                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3100                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3101                                 PIPE_CONFIG(ADDR_SURF_P2) |
3102                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3103                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3104                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3105                                 PIPE_CONFIG(ADDR_SURF_P2) |
3106                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3107                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3108                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3109                                 PIPE_CONFIG(ADDR_SURF_P2) |
3110                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3111                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3112                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3113                                 PIPE_CONFIG(ADDR_SURF_P2) |
3114                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3115                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3116                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3117                                 PIPE_CONFIG(ADDR_SURF_P2));
3118                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3119                                 PIPE_CONFIG(ADDR_SURF_P2) |
3120                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3121                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3122                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3123                                  PIPE_CONFIG(ADDR_SURF_P2) |
3124                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3125                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3126                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3127                                  PIPE_CONFIG(ADDR_SURF_P2) |
3128                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3129                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3130                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3131                                  PIPE_CONFIG(ADDR_SURF_P2) |
3132                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3133                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3134                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3135                                  PIPE_CONFIG(ADDR_SURF_P2) |
3136                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3137                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3138                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3139                                  PIPE_CONFIG(ADDR_SURF_P2) |
3140                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3141                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3142                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3143                                  PIPE_CONFIG(ADDR_SURF_P2) |
3144                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3145                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3146                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3147                                  PIPE_CONFIG(ADDR_SURF_P2) |
3148                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3149                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3150                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3151                                  PIPE_CONFIG(ADDR_SURF_P2) |
3152                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3153                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3154                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3155                                  PIPE_CONFIG(ADDR_SURF_P2) |
3156                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3157                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3158                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3159                                  PIPE_CONFIG(ADDR_SURF_P2) |
3160                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3161                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3162                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3163                                  PIPE_CONFIG(ADDR_SURF_P2) |
3164                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3165                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3166                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3167                                  PIPE_CONFIG(ADDR_SURF_P2) |
3168                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3169                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3170                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3171                                  PIPE_CONFIG(ADDR_SURF_P2) |
3172                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3173                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3174                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3175                                  PIPE_CONFIG(ADDR_SURF_P2) |
3176                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3177                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3178                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3179                                  PIPE_CONFIG(ADDR_SURF_P2) |
3180                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3181                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3182                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3183                                  PIPE_CONFIG(ADDR_SURF_P2) |
3184                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3185                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3186                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3187                                  PIPE_CONFIG(ADDR_SURF_P2) |
3188                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3189                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3190
3191                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3192                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3193                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3194                                 NUM_BANKS(ADDR_SURF_8_BANK));
3195                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3196                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3197                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3198                                 NUM_BANKS(ADDR_SURF_8_BANK));
3199                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3200                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3201                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3202                                 NUM_BANKS(ADDR_SURF_8_BANK));
3203                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3204                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3205                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3206                                 NUM_BANKS(ADDR_SURF_8_BANK));
3207                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3208                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3209                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3210                                 NUM_BANKS(ADDR_SURF_8_BANK));
3211                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3212                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3213                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3214                                 NUM_BANKS(ADDR_SURF_8_BANK));
3215                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3216                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3217                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3218                                 NUM_BANKS(ADDR_SURF_8_BANK));
3219                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3220                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3221                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3222                                 NUM_BANKS(ADDR_SURF_16_BANK));
3223                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3224                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3225                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3226                                 NUM_BANKS(ADDR_SURF_16_BANK));
3227                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3228                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3229                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3230                                  NUM_BANKS(ADDR_SURF_16_BANK));
3231                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3232                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3233                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3234                                  NUM_BANKS(ADDR_SURF_16_BANK));
3235                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3236                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3237                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3238                                  NUM_BANKS(ADDR_SURF_16_BANK));
3239                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3240                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3241                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3242                                  NUM_BANKS(ADDR_SURF_16_BANK));
3243                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3244                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3245                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3246                                  NUM_BANKS(ADDR_SURF_8_BANK));
3247
3248                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3249                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3250                             reg_offset != 23)
3251                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3252
3253                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3254                         if (reg_offset != 7)
3255                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3256
3257                 break;
3258         default:
3259                 dev_warn(adev->dev,
3260                          "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
3261                          adev->asic_type);
3262                 fallthrough;
3263
3264         case CHIP_CARRIZO:
3265                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3266                                 PIPE_CONFIG(ADDR_SURF_P2) |
3267                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3268                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3269                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3270                                 PIPE_CONFIG(ADDR_SURF_P2) |
3271                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3272                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3273                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3274                                 PIPE_CONFIG(ADDR_SURF_P2) |
3275                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3276                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3277                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3278                                 PIPE_CONFIG(ADDR_SURF_P2) |
3279                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3280                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3281                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3282                                 PIPE_CONFIG(ADDR_SURF_P2) |
3283                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3284                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3285                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3286                                 PIPE_CONFIG(ADDR_SURF_P2) |
3287                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3288                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3289                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3290                                 PIPE_CONFIG(ADDR_SURF_P2) |
3291                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3292                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3293                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3294                                 PIPE_CONFIG(ADDR_SURF_P2));
3295                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3296                                 PIPE_CONFIG(ADDR_SURF_P2) |
3297                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3298                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3299                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3300                                  PIPE_CONFIG(ADDR_SURF_P2) |
3301                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3302                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3303                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3304                                  PIPE_CONFIG(ADDR_SURF_P2) |
3305                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3306                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3307                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3308                                  PIPE_CONFIG(ADDR_SURF_P2) |
3309                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3310                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3311                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3312                                  PIPE_CONFIG(ADDR_SURF_P2) |
3313                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3314                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3315                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3316                                  PIPE_CONFIG(ADDR_SURF_P2) |
3317                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3318                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3319                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3320                                  PIPE_CONFIG(ADDR_SURF_P2) |
3321                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3322                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3323                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3324                                  PIPE_CONFIG(ADDR_SURF_P2) |
3325                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3326                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3327                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3328                                  PIPE_CONFIG(ADDR_SURF_P2) |
3329                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3330                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3331                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3332                                  PIPE_CONFIG(ADDR_SURF_P2) |
3333                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3334                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3335                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3336                                  PIPE_CONFIG(ADDR_SURF_P2) |
3337                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3338                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3339                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3340                                  PIPE_CONFIG(ADDR_SURF_P2) |
3341                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3342                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3343                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3344                                  PIPE_CONFIG(ADDR_SURF_P2) |
3345                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3346                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3347                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3348                                  PIPE_CONFIG(ADDR_SURF_P2) |
3349                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3350                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3351                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3352                                  PIPE_CONFIG(ADDR_SURF_P2) |
3353                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3354                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3355                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3356                                  PIPE_CONFIG(ADDR_SURF_P2) |
3357                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3358                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3359                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3360                                  PIPE_CONFIG(ADDR_SURF_P2) |
3361                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3362                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3363                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3364                                  PIPE_CONFIG(ADDR_SURF_P2) |
3365                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3366                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3367
3368                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3369                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3370                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3371                                 NUM_BANKS(ADDR_SURF_8_BANK));
3372                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3373                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3374                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3375                                 NUM_BANKS(ADDR_SURF_8_BANK));
3376                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3377                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3378                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3379                                 NUM_BANKS(ADDR_SURF_8_BANK));
3380                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3381                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3382                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3383                                 NUM_BANKS(ADDR_SURF_8_BANK));
3384                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3385                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3386                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3387                                 NUM_BANKS(ADDR_SURF_8_BANK));
3388                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3389                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3390                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3391                                 NUM_BANKS(ADDR_SURF_8_BANK));
3392                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3393                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3394                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3395                                 NUM_BANKS(ADDR_SURF_8_BANK));
3396                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3397                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3398                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3399                                 NUM_BANKS(ADDR_SURF_16_BANK));
3400                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3401                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3402                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3403                                 NUM_BANKS(ADDR_SURF_16_BANK));
3404                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3405                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3406                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3407                                  NUM_BANKS(ADDR_SURF_16_BANK));
3408                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3409                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3410                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3411                                  NUM_BANKS(ADDR_SURF_16_BANK));
3412                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3413                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3414                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3415                                  NUM_BANKS(ADDR_SURF_16_BANK));
3416                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3417                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3418                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3419                                  NUM_BANKS(ADDR_SURF_16_BANK));
3420                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3421                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3422                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3423                                  NUM_BANKS(ADDR_SURF_8_BANK));
3424
3425                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3426                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3427                             reg_offset != 23)
3428                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3429
3430                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3431                         if (reg_offset != 7)
3432                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3433
3434                 break;
3435         }
3436 }
3437
3438 static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
3439                                   u32 se_num, u32 sh_num, u32 instance)
3440 {
3441         u32 data;
3442
3443         if (instance == 0xffffffff)
3444                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
3445         else
3446                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
3447
3448         if (se_num == 0xffffffff)
3449                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3450         else
3451                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3452
3453         if (sh_num == 0xffffffff)
3454                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3455         else
3456                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3457
3458         WREG32(mmGRBM_GFX_INDEX, data);
3459 }
3460
3461 static void gfx_v8_0_select_me_pipe_q(struct amdgpu_device *adev,
3462                                   u32 me, u32 pipe, u32 q, u32 vm)
3463 {
3464         vi_srbm_select(adev, me, pipe, q, vm);
3465 }
3466
3467 static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
3468 {
3469         u32 data, mask;
3470
3471         data =  RREG32(mmCC_RB_BACKEND_DISABLE) |
3472                 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3473
3474         data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE);
3475
3476         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
3477                                          adev->gfx.config.max_sh_per_se);
3478
3479         return (~data) & mask;
3480 }
3481
3482 static void
3483 gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1)
3484 {
3485         switch (adev->asic_type) {
3486         case CHIP_FIJI:
3487         case CHIP_VEGAM:
3488                 *rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) |
3489                           RB_XSEL2(1) | PKR_MAP(2) |
3490                           PKR_XSEL(1) | PKR_YSEL(1) |
3491                           SE_MAP(2) | SE_XSEL(2) | SE_YSEL(3);
3492                 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) |
3493                            SE_PAIR_YSEL(2);
3494                 break;
3495         case CHIP_TONGA:
3496         case CHIP_POLARIS10:
3497                 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3498                           SE_XSEL(1) | SE_YSEL(1);
3499                 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(2) |
3500                            SE_PAIR_YSEL(2);
3501                 break;
3502         case CHIP_TOPAZ:
3503         case CHIP_CARRIZO:
3504                 *rconf |= RB_MAP_PKR0(2);
3505                 *rconf1 |= 0x0;
3506                 break;
3507         case CHIP_POLARIS11:
3508         case CHIP_POLARIS12:
3509                 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3510                           SE_XSEL(1) | SE_YSEL(1);
3511                 *rconf1 |= 0x0;
3512                 break;
3513         case CHIP_STONEY:
3514                 *rconf |= 0x0;
3515                 *rconf1 |= 0x0;
3516                 break;
3517         default:
3518                 DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type);
3519                 break;
3520         }
3521 }
3522
3523 static void
3524 gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev,
3525                                         u32 raster_config, u32 raster_config_1,
3526                                         unsigned rb_mask, unsigned num_rb)
3527 {
3528         unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1);
3529         unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1);
3530         unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2);
3531         unsigned rb_per_se = num_rb / num_se;
3532         unsigned se_mask[4];
3533         unsigned se;
3534
3535         se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask;
3536         se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask;
3537         se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask;
3538         se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask;
3539
3540         WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4));
3541         WARN_ON(!(sh_per_se == 1 || sh_per_se == 2));
3542         WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2));
3543
3544         if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
3545                              (!se_mask[2] && !se_mask[3]))) {
3546                 raster_config_1 &= ~SE_PAIR_MAP_MASK;
3547
3548                 if (!se_mask[0] && !se_mask[1]) {
3549                         raster_config_1 |=
3550                                 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3);
3551                 } else {
3552                         raster_config_1 |=
3553                                 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0);
3554                 }
3555         }
3556
3557         for (se = 0; se < num_se; se++) {
3558                 unsigned raster_config_se = raster_config;
3559                 unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
3560                 unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
3561                 int idx = (se / 2) * 2;
3562
3563                 if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
3564                         raster_config_se &= ~SE_MAP_MASK;
3565
3566                         if (!se_mask[idx]) {
3567                                 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3);
3568                         } else {
3569                                 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0);
3570                         }
3571                 }
3572
3573                 pkr0_mask &= rb_mask;
3574                 pkr1_mask &= rb_mask;
3575                 if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
3576                         raster_config_se &= ~PKR_MAP_MASK;
3577
3578                         if (!pkr0_mask) {
3579                                 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3);
3580                         } else {
3581                                 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0);
3582                         }
3583                 }
3584
3585                 if (rb_per_se >= 2) {
3586                         unsigned rb0_mask = 1 << (se * rb_per_se);
3587                         unsigned rb1_mask = rb0_mask << 1;
3588
3589                         rb0_mask &= rb_mask;
3590                         rb1_mask &= rb_mask;
3591                         if (!rb0_mask || !rb1_mask) {
3592                                 raster_config_se &= ~RB_MAP_PKR0_MASK;
3593
3594                                 if (!rb0_mask) {
3595                                         raster_config_se |=
3596                                                 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3);
3597                                 } else {
3598                                         raster_config_se |=
3599                                                 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0);
3600                                 }
3601                         }
3602
3603                         if (rb_per_se > 2) {
3604                                 rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
3605                                 rb1_mask = rb0_mask << 1;
3606                                 rb0_mask &= rb_mask;
3607                                 rb1_mask &= rb_mask;
3608                                 if (!rb0_mask || !rb1_mask) {
3609                                         raster_config_se &= ~RB_MAP_PKR1_MASK;
3610
3611                                         if (!rb0_mask) {
3612                                                 raster_config_se |=
3613                                                         RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3);
3614                                         } else {
3615                                                 raster_config_se |=
3616                                                         RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0);
3617                                         }
3618                                 }
3619                         }
3620                 }
3621
3622                 /* GRBM_GFX_INDEX has a different offset on VI */
3623                 gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff);
3624                 WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se);
3625                 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3626         }
3627
3628         /* GRBM_GFX_INDEX has a different offset on VI */
3629         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3630 }
3631
3632 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
3633 {
3634         int i, j;
3635         u32 data;
3636         u32 raster_config = 0, raster_config_1 = 0;
3637         u32 active_rbs = 0;
3638         u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
3639                                         adev->gfx.config.max_sh_per_se;
3640         unsigned num_rb_pipes;
3641
3642         mutex_lock(&adev->grbm_idx_mutex);
3643         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3644                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3645                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3646                         data = gfx_v8_0_get_rb_active_bitmap(adev);
3647                         active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
3648                                                rb_bitmap_width_per_sh);
3649                 }
3650         }
3651         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3652
3653         adev->gfx.config.backend_enable_mask = active_rbs;
3654         adev->gfx.config.num_rbs = hweight32(active_rbs);
3655
3656         num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se *
3657                              adev->gfx.config.max_shader_engines, 16);
3658
3659         gfx_v8_0_raster_config(adev, &raster_config, &raster_config_1);
3660
3661         if (!adev->gfx.config.backend_enable_mask ||
3662                         adev->gfx.config.num_rbs >= num_rb_pipes) {
3663                 WREG32(mmPA_SC_RASTER_CONFIG, raster_config);
3664                 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3665         } else {
3666                 gfx_v8_0_write_harvested_raster_configs(adev, raster_config, raster_config_1,
3667                                                         adev->gfx.config.backend_enable_mask,
3668                                                         num_rb_pipes);
3669         }
3670
3671         /* cache the values for userspace */
3672         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3673                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3674                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3675                         adev->gfx.config.rb_config[i][j].rb_backend_disable =
3676                                 RREG32(mmCC_RB_BACKEND_DISABLE);
3677                         adev->gfx.config.rb_config[i][j].user_rb_backend_disable =
3678                                 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3679                         adev->gfx.config.rb_config[i][j].raster_config =
3680                                 RREG32(mmPA_SC_RASTER_CONFIG);
3681                         adev->gfx.config.rb_config[i][j].raster_config_1 =
3682                                 RREG32(mmPA_SC_RASTER_CONFIG_1);
3683                 }
3684         }
3685         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3686         mutex_unlock(&adev->grbm_idx_mutex);
3687 }
3688
3689 /**
3690  * gfx_v8_0_init_compute_vmid - gart enable
3691  *
3692  * @adev: amdgpu_device pointer
3693  *
3694  * Initialize compute vmid sh_mem registers
3695  *
3696  */
3697 #define DEFAULT_SH_MEM_BASES    (0x6000)
3698 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
3699 {
3700         int i;
3701         uint32_t sh_mem_config;
3702         uint32_t sh_mem_bases;
3703
3704         /*
3705          * Configure apertures:
3706          * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
3707          * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
3708          * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
3709          */
3710         sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
3711
3712         sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
3713                         SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
3714                         SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
3715                         SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
3716                         MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
3717                         SH_MEM_CONFIG__PRIVATE_ATC_MASK;
3718
3719         mutex_lock(&adev->srbm_mutex);
3720         for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
3721                 vi_srbm_select(adev, 0, 0, 0, i);
3722                 /* CP and shaders */
3723                 WREG32(mmSH_MEM_CONFIG, sh_mem_config);
3724                 WREG32(mmSH_MEM_APE1_BASE, 1);
3725                 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3726                 WREG32(mmSH_MEM_BASES, sh_mem_bases);
3727         }
3728         vi_srbm_select(adev, 0, 0, 0, 0);
3729         mutex_unlock(&adev->srbm_mutex);
3730
3731         /* Initialize all compute VMIDs to have no GDS, GWS, or OA
3732            acccess. These should be enabled by FW for target VMIDs. */
3733         for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
3734                 WREG32(amdgpu_gds_reg_offset[i].mem_base, 0);
3735                 WREG32(amdgpu_gds_reg_offset[i].mem_size, 0);
3736                 WREG32(amdgpu_gds_reg_offset[i].gws, 0);
3737                 WREG32(amdgpu_gds_reg_offset[i].oa, 0);
3738         }
3739 }
3740
3741 static void gfx_v8_0_init_gds_vmid(struct amdgpu_device *adev)
3742 {
3743         int vmid;
3744
3745         /*
3746          * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
3747          * access. Compute VMIDs should be enabled by FW for target VMIDs,
3748          * the driver can enable them for graphics. VMID0 should maintain
3749          * access so that HWS firmware can save/restore entries.
3750          */
3751         for (vmid = 1; vmid < 16; vmid++) {
3752                 WREG32(amdgpu_gds_reg_offset[vmid].mem_base, 0);
3753                 WREG32(amdgpu_gds_reg_offset[vmid].mem_size, 0);
3754                 WREG32(amdgpu_gds_reg_offset[vmid].gws, 0);
3755                 WREG32(amdgpu_gds_reg_offset[vmid].oa, 0);
3756         }
3757 }
3758
3759 static void gfx_v8_0_config_init(struct amdgpu_device *adev)
3760 {
3761         switch (adev->asic_type) {
3762         default:
3763                 adev->gfx.config.double_offchip_lds_buf = 1;
3764                 break;
3765         case CHIP_CARRIZO:
3766         case CHIP_STONEY:
3767                 adev->gfx.config.double_offchip_lds_buf = 0;
3768                 break;
3769         }
3770 }
3771
3772 static void gfx_v8_0_constants_init(struct amdgpu_device *adev)
3773 {
3774         u32 tmp, sh_static_mem_cfg;
3775         int i;
3776
3777         WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF);
3778         WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3779         WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3780         WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
3781
3782         gfx_v8_0_tiling_mode_table_init(adev);
3783         gfx_v8_0_setup_rb(adev);
3784         gfx_v8_0_get_cu_info(adev);
3785         gfx_v8_0_config_init(adev);
3786
3787         /* XXX SH_MEM regs */
3788         /* where to put LDS, scratch, GPUVM in FSA64 space */
3789         sh_static_mem_cfg = REG_SET_FIELD(0, SH_STATIC_MEM_CONFIG,
3790                                    SWIZZLE_ENABLE, 1);
3791         sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3792                                    ELEMENT_SIZE, 1);
3793         sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3794                                    INDEX_STRIDE, 3);
3795         WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg);
3796
3797         mutex_lock(&adev->srbm_mutex);
3798         for (i = 0; i < adev->vm_manager.id_mgr[0].num_ids; i++) {
3799                 vi_srbm_select(adev, 0, 0, 0, i);
3800                 /* CP and shaders */
3801                 if (i == 0) {
3802                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
3803                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3804                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3805                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3806                         WREG32(mmSH_MEM_CONFIG, tmp);
3807                         WREG32(mmSH_MEM_BASES, 0);
3808                 } else {
3809                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
3810                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3811                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3812                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3813                         WREG32(mmSH_MEM_CONFIG, tmp);
3814                         tmp = adev->gmc.shared_aperture_start >> 48;
3815                         WREG32(mmSH_MEM_BASES, tmp);
3816                 }
3817
3818                 WREG32(mmSH_MEM_APE1_BASE, 1);
3819                 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3820         }
3821         vi_srbm_select(adev, 0, 0, 0, 0);
3822         mutex_unlock(&adev->srbm_mutex);
3823
3824         gfx_v8_0_init_compute_vmid(adev);
3825         gfx_v8_0_init_gds_vmid(adev);
3826
3827         mutex_lock(&adev->grbm_idx_mutex);
3828         /*
3829          * making sure that the following register writes will be broadcasted
3830          * to all the shaders
3831          */
3832         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3833
3834         WREG32(mmPA_SC_FIFO_SIZE,
3835                    (adev->gfx.config.sc_prim_fifo_size_frontend <<
3836                         PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
3837                    (adev->gfx.config.sc_prim_fifo_size_backend <<
3838                         PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
3839                    (adev->gfx.config.sc_hiz_tile_fifo_size <<
3840                         PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
3841                    (adev->gfx.config.sc_earlyz_tile_fifo_size <<
3842                         PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
3843
3844         tmp = RREG32(mmSPI_ARB_PRIORITY);
3845         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS0, 2);
3846         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS1, 2);
3847         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS2, 2);
3848         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS3, 2);
3849         WREG32(mmSPI_ARB_PRIORITY, tmp);
3850
3851         mutex_unlock(&adev->grbm_idx_mutex);
3852
3853 }
3854
3855 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3856 {
3857         u32 i, j, k;
3858         u32 mask;
3859
3860         mutex_lock(&adev->grbm_idx_mutex);
3861         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3862                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3863                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3864                         for (k = 0; k < adev->usec_timeout; k++) {
3865                                 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3866                                         break;
3867                                 udelay(1);
3868                         }
3869                         if (k == adev->usec_timeout) {
3870                                 gfx_v8_0_select_se_sh(adev, 0xffffffff,
3871                                                       0xffffffff, 0xffffffff);
3872                                 mutex_unlock(&adev->grbm_idx_mutex);
3873                                 DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
3874                                          i, j);
3875                                 return;
3876                         }
3877                 }
3878         }
3879         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3880         mutex_unlock(&adev->grbm_idx_mutex);
3881
3882         mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3883                 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3884                 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3885                 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3886         for (k = 0; k < adev->usec_timeout; k++) {
3887                 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3888                         break;
3889                 udelay(1);
3890         }
3891 }
3892
3893 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3894                                                bool enable)
3895 {
3896         u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3897
3898         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
3899         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
3900         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
3901         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
3902
3903         WREG32(mmCP_INT_CNTL_RING0, tmp);
3904 }
3905
3906 static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
3907 {
3908         adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
3909         /* csib */
3910         WREG32(mmRLC_CSIB_ADDR_HI,
3911                         adev->gfx.rlc.clear_state_gpu_addr >> 32);
3912         WREG32(mmRLC_CSIB_ADDR_LO,
3913                         adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
3914         WREG32(mmRLC_CSIB_LENGTH,
3915                         adev->gfx.rlc.clear_state_size);
3916 }
3917
3918 static void gfx_v8_0_parse_ind_reg_list(int *register_list_format,
3919                                 int ind_offset,
3920                                 int list_size,
3921                                 int *unique_indices,
3922                                 int *indices_count,
3923                                 int max_indices,
3924                                 int *ind_start_offsets,
3925                                 int *offset_count,
3926                                 int max_offset)
3927 {
3928         int indices;
3929         bool new_entry = true;
3930
3931         for (; ind_offset < list_size; ind_offset++) {
3932
3933                 if (new_entry) {
3934                         new_entry = false;
3935                         ind_start_offsets[*offset_count] = ind_offset;
3936                         *offset_count = *offset_count + 1;
3937                         BUG_ON(*offset_count >= max_offset);
3938                 }
3939
3940                 if (register_list_format[ind_offset] == 0xFFFFFFFF) {
3941                         new_entry = true;
3942                         continue;
3943                 }
3944
3945                 ind_offset += 2;
3946
3947                 /* look for the matching indice */
3948                 for (indices = 0;
3949                         indices < *indices_count;
3950                         indices++) {
3951                         if (unique_indices[indices] ==
3952                                 register_list_format[ind_offset])
3953                                 break;
3954                 }
3955
3956                 if (indices >= *indices_count) {
3957                         unique_indices[*indices_count] =
3958                                 register_list_format[ind_offset];
3959                         indices = *indices_count;
3960                         *indices_count = *indices_count + 1;
3961                         BUG_ON(*indices_count >= max_indices);
3962                 }
3963
3964                 register_list_format[ind_offset] = indices;
3965         }
3966 }
3967
3968 static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
3969 {
3970         int i, temp, data;
3971         int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0};
3972         int indices_count = 0;
3973         int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3974         int offset_count = 0;
3975
3976         int list_size;
3977         unsigned int *register_list_format =
3978                 kmemdup(adev->gfx.rlc.register_list_format,
3979                         adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
3980         if (!register_list_format)
3981                 return -ENOMEM;
3982
3983         gfx_v8_0_parse_ind_reg_list(register_list_format,
3984                                 RLC_FormatDirectRegListLength,
3985                                 adev->gfx.rlc.reg_list_format_size_bytes >> 2,
3986                                 unique_indices,
3987                                 &indices_count,
3988                                 ARRAY_SIZE(unique_indices),
3989                                 indirect_start_offsets,
3990                                 &offset_count,
3991                                 ARRAY_SIZE(indirect_start_offsets));
3992
3993         /* save and restore list */
3994         WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1);
3995
3996         WREG32(mmRLC_SRM_ARAM_ADDR, 0);
3997         for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
3998                 WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]);
3999
4000         /* indirect list */
4001         WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start);
4002         for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
4003                 WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]);
4004
4005         list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
4006         list_size = list_size >> 1;
4007         WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size);
4008         WREG32(mmRLC_GPM_SCRATCH_DATA, list_size);
4009
4010         /* starting offsets starts */
4011         WREG32(mmRLC_GPM_SCRATCH_ADDR,
4012                 adev->gfx.rlc.starting_offsets_start);
4013         for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
4014                 WREG32(mmRLC_GPM_SCRATCH_DATA,
4015                                 indirect_start_offsets[i]);
4016
4017         /* unique indices */
4018         temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
4019         data = mmRLC_SRM_INDEX_CNTL_DATA_0;
4020         for (i = 0; i < ARRAY_SIZE(unique_indices); i++) {
4021                 if (unique_indices[i] != 0) {
4022                         WREG32(temp + i, unique_indices[i] & 0x3FFFF);
4023                         WREG32(data + i, unique_indices[i] >> 20);
4024                 }
4025         }
4026         kfree(register_list_format);
4027
4028         return 0;
4029 }
4030
4031 static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
4032 {
4033         WREG32_FIELD(RLC_SRM_CNTL, SRM_ENABLE, 1);
4034 }
4035
4036 static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev)
4037 {
4038         uint32_t data;
4039
4040         WREG32_FIELD(CP_RB_WPTR_POLL_CNTL, IDLE_POLL_COUNT, 0x60);
4041
4042         data = REG_SET_FIELD(0, RLC_PG_DELAY, POWER_UP_DELAY, 0x10);
4043         data = REG_SET_FIELD(data, RLC_PG_DELAY, POWER_DOWN_DELAY, 0x10);
4044         data = REG_SET_FIELD(data, RLC_PG_DELAY, CMD_PROPAGATE_DELAY, 0x10);
4045         data = REG_SET_FIELD(data, RLC_PG_DELAY, MEM_SLEEP_DELAY, 0x10);
4046         WREG32(mmRLC_PG_DELAY, data);
4047
4048         WREG32_FIELD(RLC_PG_DELAY_2, SERDES_CMD_DELAY, 0x3);
4049         WREG32_FIELD(RLC_AUTO_PG_CTRL, GRBM_REG_SAVE_GFX_IDLE_THRESHOLD, 0x55f0);
4050
4051 }
4052
4053 static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
4054                                                 bool enable)
4055 {
4056         WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PU_ENABLE, enable ? 1 : 0);
4057 }
4058
4059 static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
4060                                                   bool enable)
4061 {
4062         WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PD_ENABLE, enable ? 1 : 0);
4063 }
4064
4065 static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable)
4066 {
4067         WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 0 : 1);
4068 }
4069
4070 static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
4071 {
4072         if ((adev->asic_type == CHIP_CARRIZO) ||
4073             (adev->asic_type == CHIP_STONEY)) {
4074                 gfx_v8_0_init_csb(adev);
4075                 gfx_v8_0_init_save_restore_list(adev);
4076                 gfx_v8_0_enable_save_restore_machine(adev);
4077                 WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
4078                 gfx_v8_0_init_power_gating(adev);
4079                 WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
4080         } else if ((adev->asic_type == CHIP_POLARIS11) ||
4081                    (adev->asic_type == CHIP_POLARIS12) ||
4082                    (adev->asic_type == CHIP_VEGAM)) {
4083                 gfx_v8_0_init_csb(adev);
4084                 gfx_v8_0_init_save_restore_list(adev);
4085                 gfx_v8_0_enable_save_restore_machine(adev);
4086                 gfx_v8_0_init_power_gating(adev);
4087         }
4088
4089 }
4090
4091 static void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
4092 {
4093         WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 0);
4094
4095         gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4096         gfx_v8_0_wait_for_rlc_serdes(adev);
4097 }
4098
4099 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
4100 {
4101         WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4102         udelay(50);
4103
4104         WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
4105         udelay(50);
4106 }
4107
4108 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
4109 {
4110         WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 1);
4111
4112         /* carrizo do enable cp interrupt after cp inited */
4113         if (!(adev->flags & AMD_IS_APU))
4114                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4115
4116         udelay(50);
4117 }
4118
4119 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
4120 {
4121         if (amdgpu_sriov_vf(adev)) {
4122                 gfx_v8_0_init_csb(adev);
4123                 return 0;
4124         }
4125
4126         adev->gfx.rlc.funcs->stop(adev);
4127         adev->gfx.rlc.funcs->reset(adev);
4128         gfx_v8_0_init_pg(adev);
4129         adev->gfx.rlc.funcs->start(adev);
4130
4131         return 0;
4132 }
4133
4134 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
4135 {
4136         u32 tmp = RREG32(mmCP_ME_CNTL);
4137
4138         if (enable) {
4139                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
4140                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
4141                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
4142         } else {
4143                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
4144                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
4145                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
4146         }
4147         WREG32(mmCP_ME_CNTL, tmp);
4148         udelay(50);
4149 }
4150
4151 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
4152 {
4153         u32 count = 0;
4154         const struct cs_section_def *sect = NULL;
4155         const struct cs_extent_def *ext = NULL;
4156
4157         /* begin clear state */
4158         count += 2;
4159         /* context control state */
4160         count += 3;
4161
4162         for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4163                 for (ext = sect->section; ext->extent != NULL; ++ext) {
4164                         if (sect->id == SECT_CONTEXT)
4165                                 count += 2 + ext->reg_count;
4166                         else
4167                                 return 0;
4168                 }
4169         }
4170         /* pa_sc_raster_config/pa_sc_raster_config1 */
4171         count += 4;
4172         /* end clear state */
4173         count += 2;
4174         /* clear state */
4175         count += 2;
4176
4177         return count;
4178 }
4179
4180 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
4181 {
4182         struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
4183         const struct cs_section_def *sect = NULL;
4184         const struct cs_extent_def *ext = NULL;
4185         int r, i;
4186
4187         /* init the CP */
4188         WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
4189         WREG32(mmCP_ENDIAN_SWAP, 0);
4190         WREG32(mmCP_DEVICE_ID, 1);
4191
4192         gfx_v8_0_cp_gfx_enable(adev, true);
4193
4194         r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
4195         if (r) {
4196                 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
4197                 return r;
4198         }
4199
4200         /* clear state buffer */
4201         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4202         amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4203
4204         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4205         amdgpu_ring_write(ring, 0x80000000);
4206         amdgpu_ring_write(ring, 0x80000000);
4207
4208         for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4209                 for (ext = sect->section; ext->extent != NULL; ++ext) {
4210                         if (sect->id == SECT_CONTEXT) {
4211                                 amdgpu_ring_write(ring,
4212                                        PACKET3(PACKET3_SET_CONTEXT_REG,
4213                                                ext->reg_count));
4214                                 amdgpu_ring_write(ring,
4215                                        ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
4216                                 for (i = 0; i < ext->reg_count; i++)
4217                                         amdgpu_ring_write(ring, ext->extent[i]);
4218                         }
4219                 }
4220         }
4221
4222         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4223         amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
4224         amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config);
4225         amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config_1);
4226
4227         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4228         amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4229
4230         amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4231         amdgpu_ring_write(ring, 0);
4232
4233         /* init the CE partitions */
4234         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4235         amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4236         amdgpu_ring_write(ring, 0x8000);
4237         amdgpu_ring_write(ring, 0x8000);
4238
4239         amdgpu_ring_commit(ring);
4240
4241         return 0;
4242 }
4243 static void gfx_v8_0_set_cpg_door_bell(struct amdgpu_device *adev, struct amdgpu_ring *ring)
4244 {
4245         u32 tmp;
4246         /* no gfx doorbells on iceland */
4247         if (adev->asic_type == CHIP_TOPAZ)
4248                 return;
4249
4250         tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
4251
4252         if (ring->use_doorbell) {
4253                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4254                                 DOORBELL_OFFSET, ring->doorbell_index);
4255                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4256                                                 DOORBELL_HIT, 0);
4257                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4258                                             DOORBELL_EN, 1);
4259         } else {
4260                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
4261         }
4262
4263         WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
4264
4265         if (adev->flags & AMD_IS_APU)
4266                 return;
4267
4268         tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
4269                                         DOORBELL_RANGE_LOWER,
4270                                         adev->doorbell_index.gfx_ring0);
4271         WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
4272
4273         WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
4274                 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
4275 }
4276
4277 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
4278 {
4279         struct amdgpu_ring *ring;
4280         u32 tmp;
4281         u32 rb_bufsz;
4282         u64 rb_addr, rptr_addr, wptr_gpu_addr;
4283
4284         /* Set the write pointer delay */
4285         WREG32(mmCP_RB_WPTR_DELAY, 0);
4286
4287         /* set the RB to use vmid 0 */
4288         WREG32(mmCP_RB_VMID, 0);
4289
4290         /* Set ring buffer size */
4291         ring = &adev->gfx.gfx_ring[0];
4292         rb_bufsz = order_base_2(ring->ring_size / 8);
4293         tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
4294         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
4295         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
4296         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
4297 #ifdef __BIG_ENDIAN
4298         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
4299 #endif
4300         WREG32(mmCP_RB0_CNTL, tmp);
4301
4302         /* Initialize the ring buffer's read and write pointers */
4303         WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
4304         ring->wptr = 0;
4305         WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
4306
4307         /* set the wb address wether it's enabled or not */
4308         rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4309         WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
4310         WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
4311
4312         wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4313         WREG32(mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
4314         WREG32(mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
4315         mdelay(1);
4316         WREG32(mmCP_RB0_CNTL, tmp);
4317
4318         rb_addr = ring->gpu_addr >> 8;
4319         WREG32(mmCP_RB0_BASE, rb_addr);
4320         WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
4321
4322         gfx_v8_0_set_cpg_door_bell(adev, ring);
4323         /* start the ring */
4324         amdgpu_ring_clear_ring(ring);
4325         gfx_v8_0_cp_gfx_start(adev);
4326         ring->sched.ready = true;
4327
4328         return 0;
4329 }
4330
4331 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
4332 {
4333         if (enable) {
4334                 WREG32(mmCP_MEC_CNTL, 0);
4335         } else {
4336                 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
4337                 adev->gfx.kiq.ring.sched.ready = false;
4338         }
4339         udelay(50);
4340 }
4341
4342 /* KIQ functions */
4343 static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring)
4344 {
4345         uint32_t tmp;
4346         struct amdgpu_device *adev = ring->adev;
4347
4348         /* tell RLC which is KIQ queue */
4349         tmp = RREG32(mmRLC_CP_SCHEDULERS);
4350         tmp &= 0xffffff00;
4351         tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
4352         WREG32(mmRLC_CP_SCHEDULERS, tmp);
4353         tmp |= 0x80;
4354         WREG32(mmRLC_CP_SCHEDULERS, tmp);
4355 }
4356
4357 static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev)
4358 {
4359         struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
4360         uint64_t queue_mask = 0;
4361         int r, i;
4362
4363         for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
4364                 if (!test_bit(i, adev->gfx.mec.queue_bitmap))
4365                         continue;
4366
4367                 /* This situation may be hit in the future if a new HW
4368                  * generation exposes more than 64 queues. If so, the
4369                  * definition of queue_mask needs updating */
4370                 if (WARN_ON(i >= (sizeof(queue_mask)*8))) {
4371                         DRM_ERROR("Invalid KCQ enabled: %d\n", i);
4372                         break;
4373                 }
4374
4375                 queue_mask |= (1ull << i);
4376         }
4377
4378         r = amdgpu_ring_alloc(kiq_ring, (8 * adev->gfx.num_compute_rings) + 8);
4379         if (r) {
4380                 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
4381                 return r;
4382         }
4383         /* set resources */
4384         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
4385         amdgpu_ring_write(kiq_ring, 0); /* vmid_mask:0 queue_type:0 (KIQ) */
4386         amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */
4387         amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */
4388         amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
4389         amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
4390         amdgpu_ring_write(kiq_ring, 0); /* oac mask */
4391         amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
4392         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4393                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4394                 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
4395                 uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4396
4397                 /* map queues */
4398                 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
4399                 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
4400                 amdgpu_ring_write(kiq_ring,
4401                                   PACKET3_MAP_QUEUES_NUM_QUEUES(1));
4402                 amdgpu_ring_write(kiq_ring,
4403                                   PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index) |
4404                                   PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
4405                                   PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
4406                                   PACKET3_MAP_QUEUES_ME(ring->me == 1 ? 0 : 1)); /* doorbell */
4407                 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
4408                 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
4409                 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
4410                 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
4411         }
4412
4413         amdgpu_ring_commit(kiq_ring);
4414
4415         return 0;
4416 }
4417
4418 static int gfx_v8_0_deactivate_hqd(struct amdgpu_device *adev, u32 req)
4419 {
4420         int i, r = 0;
4421
4422         if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) {
4423                 WREG32_FIELD(CP_HQD_DEQUEUE_REQUEST, DEQUEUE_REQ, req);
4424                 for (i = 0; i < adev->usec_timeout; i++) {
4425                         if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK))
4426                                 break;
4427                         udelay(1);
4428                 }
4429                 if (i == adev->usec_timeout)
4430                         r = -ETIMEDOUT;
4431         }
4432         WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
4433         WREG32(mmCP_HQD_PQ_RPTR, 0);
4434         WREG32(mmCP_HQD_PQ_WPTR, 0);
4435
4436         return r;
4437 }
4438
4439 static void gfx_v8_0_mqd_set_priority(struct amdgpu_ring *ring, struct vi_mqd *mqd)
4440 {
4441         struct amdgpu_device *adev = ring->adev;
4442
4443         if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
4444                 if (amdgpu_gfx_is_high_priority_compute_queue(adev, ring->queue)) {
4445                         mqd->cp_hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH;
4446                         mqd->cp_hqd_queue_priority =
4447                                 AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM;
4448                 }
4449         }
4450 }
4451
4452 static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
4453 {
4454         struct amdgpu_device *adev = ring->adev;
4455         struct vi_mqd *mqd = ring->mqd_ptr;
4456         uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
4457         uint32_t tmp;
4458
4459         mqd->header = 0xC0310800;
4460         mqd->compute_pipelinestat_enable = 0x00000001;
4461         mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4462         mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4463         mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4464         mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4465         mqd->compute_misc_reserved = 0x00000003;
4466         mqd->dynamic_cu_mask_addr_lo = lower_32_bits(ring->mqd_gpu_addr
4467                                                      + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4468         mqd->dynamic_cu_mask_addr_hi = upper_32_bits(ring->mqd_gpu_addr
4469                                                      + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4470         eop_base_addr = ring->eop_gpu_addr >> 8;
4471         mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
4472         mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
4473
4474         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4475         tmp = RREG32(mmCP_HQD_EOP_CONTROL);
4476         tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
4477                         (order_base_2(GFX8_MEC_HPD_SIZE / 4) - 1));
4478
4479         mqd->cp_hqd_eop_control = tmp;
4480
4481         /* enable doorbell? */
4482         tmp = REG_SET_FIELD(RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL),
4483                             CP_HQD_PQ_DOORBELL_CONTROL,
4484                             DOORBELL_EN,
4485                             ring->use_doorbell ? 1 : 0);
4486
4487         mqd->cp_hqd_pq_doorbell_control = tmp;
4488
4489         /* set the pointer to the MQD */
4490         mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
4491         mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
4492
4493         /* set MQD vmid to 0 */
4494         tmp = RREG32(mmCP_MQD_CONTROL);
4495         tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4496         mqd->cp_mqd_control = tmp;
4497
4498         /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4499         hqd_gpu_addr = ring->gpu_addr >> 8;
4500         mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4501         mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4502
4503         /* set up the HQD, this is similar to CP_RB0_CNTL */
4504         tmp = RREG32(mmCP_HQD_PQ_CONTROL);
4505         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4506                             (order_base_2(ring->ring_size / 4) - 1));
4507         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4508                         ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
4509 #ifdef __BIG_ENDIAN
4510         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
4511 #endif
4512         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
4513         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
4514         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4515         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4516         mqd->cp_hqd_pq_control = tmp;
4517
4518         /* set the wb address whether it's enabled or not */
4519         wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4520         mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4521         mqd->cp_hqd_pq_rptr_report_addr_hi =
4522                 upper_32_bits(wb_gpu_addr) & 0xffff;
4523
4524         /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4525         wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4526         mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
4527         mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4528
4529         tmp = 0;
4530         /* enable the doorbell if requested */
4531         if (ring->use_doorbell) {
4532                 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4533                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4534                                 DOORBELL_OFFSET, ring->doorbell_index);
4535
4536                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4537                                          DOORBELL_EN, 1);
4538                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4539                                          DOORBELL_SOURCE, 0);
4540                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4541                                          DOORBELL_HIT, 0);
4542         }
4543
4544         mqd->cp_hqd_pq_doorbell_control = tmp;
4545
4546         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4547         ring->wptr = 0;
4548         mqd->cp_hqd_pq_wptr = ring->wptr;
4549         mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
4550
4551         /* set the vmid for the queue */
4552         mqd->cp_hqd_vmid = 0;
4553
4554         tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
4555         tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
4556         mqd->cp_hqd_persistent_state = tmp;
4557
4558         /* set MTYPE */
4559         tmp = RREG32(mmCP_HQD_IB_CONTROL);
4560         tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
4561         tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MTYPE, 3);
4562         mqd->cp_hqd_ib_control = tmp;
4563
4564         tmp = RREG32(mmCP_HQD_IQ_TIMER);
4565         tmp = REG_SET_FIELD(tmp, CP_HQD_IQ_TIMER, MTYPE, 3);
4566         mqd->cp_hqd_iq_timer = tmp;
4567
4568         tmp = RREG32(mmCP_HQD_CTX_SAVE_CONTROL);
4569         tmp = REG_SET_FIELD(tmp, CP_HQD_CTX_SAVE_CONTROL, MTYPE, 3);
4570         mqd->cp_hqd_ctx_save_control = tmp;
4571
4572         /* defaults */
4573         mqd->cp_hqd_eop_rptr = RREG32(mmCP_HQD_EOP_RPTR);
4574         mqd->cp_hqd_eop_wptr = RREG32(mmCP_HQD_EOP_WPTR);
4575         mqd->cp_hqd_ctx_save_base_addr_lo = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_LO);
4576         mqd->cp_hqd_ctx_save_base_addr_hi = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_HI);
4577         mqd->cp_hqd_cntl_stack_offset = RREG32(mmCP_HQD_CNTL_STACK_OFFSET);
4578         mqd->cp_hqd_cntl_stack_size = RREG32(mmCP_HQD_CNTL_STACK_SIZE);
4579         mqd->cp_hqd_wg_state_offset = RREG32(mmCP_HQD_WG_STATE_OFFSET);
4580         mqd->cp_hqd_ctx_save_size = RREG32(mmCP_HQD_CTX_SAVE_SIZE);
4581         mqd->cp_hqd_eop_done_events = RREG32(mmCP_HQD_EOP_EVENTS);
4582         mqd->cp_hqd_error = RREG32(mmCP_HQD_ERROR);
4583         mqd->cp_hqd_eop_wptr_mem = RREG32(mmCP_HQD_EOP_WPTR_MEM);
4584         mqd->cp_hqd_eop_dones = RREG32(mmCP_HQD_EOP_DONES);
4585
4586         /* set static priority for a queue/ring */
4587         gfx_v8_0_mqd_set_priority(ring, mqd);
4588         mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM);
4589
4590         /* map_queues packet doesn't need activate the queue,
4591          * so only kiq need set this field.
4592          */
4593         if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
4594                 mqd->cp_hqd_active = 1;
4595
4596         return 0;
4597 }
4598
4599 static int gfx_v8_0_mqd_commit(struct amdgpu_device *adev,
4600                         struct vi_mqd *mqd)
4601 {
4602         uint32_t mqd_reg;
4603         uint32_t *mqd_data;
4604
4605         /* HQD registers extend from mmCP_MQD_BASE_ADDR to mmCP_HQD_ERROR */
4606         mqd_data = &mqd->cp_mqd_base_addr_lo;
4607
4608         /* disable wptr polling */
4609         WREG32_FIELD(CP_PQ_WPTR_POLL_CNTL, EN, 0);
4610
4611         /* program all HQD registers */
4612         for (mqd_reg = mmCP_HQD_VMID; mqd_reg <= mmCP_HQD_EOP_CONTROL; mqd_reg++)
4613                 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4614
4615         /* Tonga errata: EOP RPTR/WPTR should be left unmodified.
4616          * This is safe since EOP RPTR==WPTR for any inactive HQD
4617          * on ASICs that do not support context-save.
4618          * EOP writes/reads can start anywhere in the ring.
4619          */
4620         if (adev->asic_type != CHIP_TONGA) {
4621                 WREG32(mmCP_HQD_EOP_RPTR, mqd->cp_hqd_eop_rptr);
4622                 WREG32(mmCP_HQD_EOP_WPTR, mqd->cp_hqd_eop_wptr);
4623                 WREG32(mmCP_HQD_EOP_WPTR_MEM, mqd->cp_hqd_eop_wptr_mem);
4624         }
4625
4626         for (mqd_reg = mmCP_HQD_EOP_EVENTS; mqd_reg <= mmCP_HQD_ERROR; mqd_reg++)
4627                 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4628
4629         /* activate the HQD */
4630         for (mqd_reg = mmCP_MQD_BASE_ADDR; mqd_reg <= mmCP_HQD_ACTIVE; mqd_reg++)
4631                 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4632
4633         return 0;
4634 }
4635
4636 static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring)
4637 {
4638         struct amdgpu_device *adev = ring->adev;
4639         struct vi_mqd *mqd = ring->mqd_ptr;
4640         int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
4641
4642         gfx_v8_0_kiq_setting(ring);
4643
4644         if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
4645                 /* reset MQD to a clean status */
4646                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4647                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4648
4649                 /* reset ring buffer */
4650                 ring->wptr = 0;
4651                 amdgpu_ring_clear_ring(ring);
4652                 mutex_lock(&adev->srbm_mutex);
4653                 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4654                 gfx_v8_0_mqd_commit(adev, mqd);
4655                 vi_srbm_select(adev, 0, 0, 0, 0);
4656                 mutex_unlock(&adev->srbm_mutex);
4657         } else {
4658                 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4659                 ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4660                 ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
4661                 mutex_lock(&adev->srbm_mutex);
4662                 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4663                 gfx_v8_0_mqd_init(ring);
4664                 gfx_v8_0_mqd_commit(adev, mqd);
4665                 vi_srbm_select(adev, 0, 0, 0, 0);
4666                 mutex_unlock(&adev->srbm_mutex);
4667
4668                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4669                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4670         }
4671
4672         return 0;
4673 }
4674
4675 static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring)
4676 {
4677         struct amdgpu_device *adev = ring->adev;
4678         struct vi_mqd *mqd = ring->mqd_ptr;
4679         int mqd_idx = ring - &adev->gfx.compute_ring[0];
4680
4681         if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
4682                 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4683                 ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4684                 ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
4685                 mutex_lock(&adev->srbm_mutex);
4686                 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4687                 gfx_v8_0_mqd_init(ring);
4688                 vi_srbm_select(adev, 0, 0, 0, 0);
4689                 mutex_unlock(&adev->srbm_mutex);
4690
4691                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4692                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4693         } else if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
4694                 /* reset MQD to a clean status */
4695                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4696                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4697                 /* reset ring buffer */
4698                 ring->wptr = 0;
4699                 amdgpu_ring_clear_ring(ring);
4700         } else {
4701                 amdgpu_ring_clear_ring(ring);
4702         }
4703         return 0;
4704 }
4705
4706 static void gfx_v8_0_set_mec_doorbell_range(struct amdgpu_device *adev)
4707 {
4708         if (adev->asic_type > CHIP_TONGA) {
4709                 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, adev->doorbell_index.kiq << 2);
4710                 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, adev->doorbell_index.mec_ring7 << 2);
4711         }
4712         /* enable doorbells */
4713         WREG32_FIELD(CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4714 }
4715
4716 static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev)
4717 {
4718         struct amdgpu_ring *ring;
4719         int r;
4720
4721         ring = &adev->gfx.kiq.ring;
4722
4723         r = amdgpu_bo_reserve(ring->mqd_obj, false);
4724         if (unlikely(r != 0))
4725                 return r;
4726
4727         r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4728         if (unlikely(r != 0))
4729                 return r;
4730
4731         gfx_v8_0_kiq_init_queue(ring);
4732         amdgpu_bo_kunmap(ring->mqd_obj);
4733         ring->mqd_ptr = NULL;
4734         amdgpu_bo_unreserve(ring->mqd_obj);
4735         ring->sched.ready = true;
4736         return 0;
4737 }
4738
4739 static int gfx_v8_0_kcq_resume(struct amdgpu_device *adev)
4740 {
4741         struct amdgpu_ring *ring = NULL;
4742         int r = 0, i;
4743
4744         gfx_v8_0_cp_compute_enable(adev, true);
4745
4746         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4747                 ring = &adev->gfx.compute_ring[i];
4748
4749                 r = amdgpu_bo_reserve(ring->mqd_obj, false);
4750                 if (unlikely(r != 0))
4751                         goto done;
4752                 r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4753                 if (!r) {
4754                         r = gfx_v8_0_kcq_init_queue(ring);
4755                         amdgpu_bo_kunmap(ring->mqd_obj);
4756                         ring->mqd_ptr = NULL;
4757                 }
4758                 amdgpu_bo_unreserve(ring->mqd_obj);
4759                 if (r)
4760                         goto done;
4761         }
4762
4763         gfx_v8_0_set_mec_doorbell_range(adev);
4764
4765         r = gfx_v8_0_kiq_kcq_enable(adev);
4766         if (r)
4767                 goto done;
4768
4769 done:
4770         return r;
4771 }
4772
4773 static int gfx_v8_0_cp_test_all_rings(struct amdgpu_device *adev)
4774 {
4775         int r, i;
4776         struct amdgpu_ring *ring;
4777
4778         /* collect all the ring_tests here, gfx, kiq, compute */
4779         ring = &adev->gfx.gfx_ring[0];
4780         r = amdgpu_ring_test_helper(ring);
4781         if (r)
4782                 return r;
4783
4784         ring = &adev->gfx.kiq.ring;
4785         r = amdgpu_ring_test_helper(ring);
4786         if (r)
4787                 return r;
4788
4789         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4790                 ring = &adev->gfx.compute_ring[i];
4791                 amdgpu_ring_test_helper(ring);
4792         }
4793
4794         return 0;
4795 }
4796
4797 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
4798 {
4799         int r;
4800
4801         if (!(adev->flags & AMD_IS_APU))
4802                 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4803
4804         r = gfx_v8_0_kiq_resume(adev);
4805         if (r)
4806                 return r;
4807
4808         r = gfx_v8_0_cp_gfx_resume(adev);
4809         if (r)
4810                 return r;
4811
4812         r = gfx_v8_0_kcq_resume(adev);
4813         if (r)
4814                 return r;
4815
4816         r = gfx_v8_0_cp_test_all_rings(adev);
4817         if (r)
4818                 return r;
4819
4820         gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4821
4822         return 0;
4823 }
4824
4825 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
4826 {
4827         gfx_v8_0_cp_gfx_enable(adev, enable);
4828         gfx_v8_0_cp_compute_enable(adev, enable);
4829 }
4830
4831 static int gfx_v8_0_hw_init(void *handle)
4832 {
4833         int r;
4834         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4835
4836         gfx_v8_0_init_golden_registers(adev);
4837         gfx_v8_0_constants_init(adev);
4838
4839         r = adev->gfx.rlc.funcs->resume(adev);
4840         if (r)
4841                 return r;
4842
4843         r = gfx_v8_0_cp_resume(adev);
4844
4845         return r;
4846 }
4847
4848 static int gfx_v8_0_kcq_disable(struct amdgpu_device *adev)
4849 {
4850         int r, i;
4851         struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
4852
4853         r = amdgpu_ring_alloc(kiq_ring, 6 * adev->gfx.num_compute_rings);
4854         if (r)
4855                 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
4856
4857         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4858                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4859
4860                 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
4861                 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
4862                                                 PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */
4863                                                 PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
4864                                                 PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) |
4865                                                 PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
4866                 amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
4867                 amdgpu_ring_write(kiq_ring, 0);
4868                 amdgpu_ring_write(kiq_ring, 0);
4869                 amdgpu_ring_write(kiq_ring, 0);
4870         }
4871         r = amdgpu_ring_test_helper(kiq_ring);
4872         if (r)
4873                 DRM_ERROR("KCQ disable failed\n");
4874
4875         return r;
4876 }
4877
4878 static bool gfx_v8_0_is_idle(void *handle)
4879 {
4880         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4881
4882         if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE)
4883                 || RREG32(mmGRBM_STATUS2) != 0x8)
4884                 return false;
4885         else
4886                 return true;
4887 }
4888
4889 static bool gfx_v8_0_rlc_is_idle(void *handle)
4890 {
4891         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4892
4893         if (RREG32(mmGRBM_STATUS2) != 0x8)
4894                 return false;
4895         else
4896                 return true;
4897 }
4898
4899 static int gfx_v8_0_wait_for_rlc_idle(void *handle)
4900 {
4901         unsigned int i;
4902         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4903
4904         for (i = 0; i < adev->usec_timeout; i++) {
4905                 if (gfx_v8_0_rlc_is_idle(handle))
4906                         return 0;
4907
4908                 udelay(1);
4909         }
4910         return -ETIMEDOUT;
4911 }
4912
4913 static int gfx_v8_0_wait_for_idle(void *handle)
4914 {
4915         unsigned int i;
4916         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4917
4918         for (i = 0; i < adev->usec_timeout; i++) {
4919                 if (gfx_v8_0_is_idle(handle))
4920                         return 0;
4921
4922                 udelay(1);
4923         }
4924         return -ETIMEDOUT;
4925 }
4926
4927 static int gfx_v8_0_hw_fini(void *handle)
4928 {
4929         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4930
4931         amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
4932         amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
4933
4934         amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
4935
4936         amdgpu_irq_put(adev, &adev->gfx.sq_irq, 0);
4937
4938         /* disable KCQ to avoid CPC touch memory not valid anymore */
4939         gfx_v8_0_kcq_disable(adev);
4940
4941         if (amdgpu_sriov_vf(adev)) {
4942                 pr_debug("For SRIOV client, shouldn't do anything.\n");
4943                 return 0;
4944         }
4945         amdgpu_gfx_rlc_enter_safe_mode(adev);
4946         if (!gfx_v8_0_wait_for_idle(adev))
4947                 gfx_v8_0_cp_enable(adev, false);
4948         else
4949                 pr_err("cp is busy, skip halt cp\n");
4950         if (!gfx_v8_0_wait_for_rlc_idle(adev))
4951                 adev->gfx.rlc.funcs->stop(adev);
4952         else
4953                 pr_err("rlc is busy, skip halt rlc\n");
4954         amdgpu_gfx_rlc_exit_safe_mode(adev);
4955
4956         return 0;
4957 }
4958
4959 static int gfx_v8_0_suspend(void *handle)
4960 {
4961         return gfx_v8_0_hw_fini(handle);
4962 }
4963
4964 static int gfx_v8_0_resume(void *handle)
4965 {
4966         return gfx_v8_0_hw_init(handle);
4967 }
4968
4969 static bool gfx_v8_0_check_soft_reset(void *handle)
4970 {
4971         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4972         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4973         u32 tmp;
4974
4975         /* GRBM_STATUS */
4976         tmp = RREG32(mmGRBM_STATUS);
4977         if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
4978                    GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
4979                    GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
4980                    GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
4981                    GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
4982                    GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK |
4983                    GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
4984                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4985                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4986                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4987                                                 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
4988                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4989                                                 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
4990         }
4991
4992         /* GRBM_STATUS2 */
4993         tmp = RREG32(mmGRBM_STATUS2);
4994         if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
4995                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4996                                                 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4997
4998         if (REG_GET_FIELD(tmp, GRBM_STATUS2, CPF_BUSY) ||
4999             REG_GET_FIELD(tmp, GRBM_STATUS2, CPC_BUSY) ||
5000             REG_GET_FIELD(tmp, GRBM_STATUS2, CPG_BUSY)) {
5001                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5002                                                 SOFT_RESET_CPF, 1);
5003                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5004                                                 SOFT_RESET_CPC, 1);
5005                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5006                                                 SOFT_RESET_CPG, 1);
5007                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET,
5008                                                 SOFT_RESET_GRBM, 1);
5009         }
5010
5011         /* SRBM_STATUS */
5012         tmp = RREG32(mmSRBM_STATUS);
5013         if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
5014                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5015                                                 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5016         if (REG_GET_FIELD(tmp, SRBM_STATUS, SEM_BUSY))
5017                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5018                                                 SRBM_SOFT_RESET, SOFT_RESET_SEM, 1);
5019
5020         if (grbm_soft_reset || srbm_soft_reset) {
5021                 adev->gfx.grbm_soft_reset = grbm_soft_reset;
5022                 adev->gfx.srbm_soft_reset = srbm_soft_reset;
5023                 return true;
5024         } else {
5025                 adev->gfx.grbm_soft_reset = 0;
5026                 adev->gfx.srbm_soft_reset = 0;
5027                 return false;
5028         }
5029 }
5030
5031 static int gfx_v8_0_pre_soft_reset(void *handle)
5032 {
5033         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5034         u32 grbm_soft_reset = 0;
5035
5036         if ((!adev->gfx.grbm_soft_reset) &&
5037             (!adev->gfx.srbm_soft_reset))
5038                 return 0;
5039
5040         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5041
5042         /* stop the rlc */
5043         adev->gfx.rlc.funcs->stop(adev);
5044
5045         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5046             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5047                 /* Disable GFX parsing/prefetching */
5048                 gfx_v8_0_cp_gfx_enable(adev, false);
5049
5050         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5051             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5052             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5053             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5054                 int i;
5055
5056                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5057                         struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5058
5059                         mutex_lock(&adev->srbm_mutex);
5060                         vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5061                         gfx_v8_0_deactivate_hqd(adev, 2);
5062                         vi_srbm_select(adev, 0, 0, 0, 0);
5063                         mutex_unlock(&adev->srbm_mutex);
5064                 }
5065                 /* Disable MEC parsing/prefetching */
5066                 gfx_v8_0_cp_compute_enable(adev, false);
5067         }
5068
5069         return 0;
5070 }
5071
5072 static int gfx_v8_0_soft_reset(void *handle)
5073 {
5074         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5075         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5076         u32 tmp;
5077
5078         if ((!adev->gfx.grbm_soft_reset) &&
5079             (!adev->gfx.srbm_soft_reset))
5080                 return 0;
5081
5082         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5083         srbm_soft_reset = adev->gfx.srbm_soft_reset;
5084
5085         if (grbm_soft_reset || srbm_soft_reset) {
5086                 tmp = RREG32(mmGMCON_DEBUG);
5087                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 1);
5088                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 1);
5089                 WREG32(mmGMCON_DEBUG, tmp);
5090                 udelay(50);
5091         }
5092
5093         if (grbm_soft_reset) {
5094                 tmp = RREG32(mmGRBM_SOFT_RESET);
5095                 tmp |= grbm_soft_reset;
5096                 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5097                 WREG32(mmGRBM_SOFT_RESET, tmp);
5098                 tmp = RREG32(mmGRBM_SOFT_RESET);
5099
5100                 udelay(50);
5101
5102                 tmp &= ~grbm_soft_reset;
5103                 WREG32(mmGRBM_SOFT_RESET, tmp);
5104                 tmp = RREG32(mmGRBM_SOFT_RESET);
5105         }
5106
5107         if (srbm_soft_reset) {
5108                 tmp = RREG32(mmSRBM_SOFT_RESET);
5109                 tmp |= srbm_soft_reset;
5110                 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5111                 WREG32(mmSRBM_SOFT_RESET, tmp);
5112                 tmp = RREG32(mmSRBM_SOFT_RESET);
5113
5114                 udelay(50);
5115
5116                 tmp &= ~srbm_soft_reset;
5117                 WREG32(mmSRBM_SOFT_RESET, tmp);
5118                 tmp = RREG32(mmSRBM_SOFT_RESET);
5119         }
5120
5121         if (grbm_soft_reset || srbm_soft_reset) {
5122                 tmp = RREG32(mmGMCON_DEBUG);
5123                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 0);
5124                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 0);
5125                 WREG32(mmGMCON_DEBUG, tmp);
5126         }
5127
5128         /* Wait a little for things to settle down */
5129         udelay(50);
5130
5131         return 0;
5132 }
5133
5134 static int gfx_v8_0_post_soft_reset(void *handle)
5135 {
5136         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5137         u32 grbm_soft_reset = 0;
5138
5139         if ((!adev->gfx.grbm_soft_reset) &&
5140             (!adev->gfx.srbm_soft_reset))
5141                 return 0;
5142
5143         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5144
5145         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5146             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5147             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5148             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5149                 int i;
5150
5151                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5152                         struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5153
5154                         mutex_lock(&adev->srbm_mutex);
5155                         vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5156                         gfx_v8_0_deactivate_hqd(adev, 2);
5157                         vi_srbm_select(adev, 0, 0, 0, 0);
5158                         mutex_unlock(&adev->srbm_mutex);
5159                 }
5160                 gfx_v8_0_kiq_resume(adev);
5161                 gfx_v8_0_kcq_resume(adev);
5162         }
5163
5164         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5165             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5166                 gfx_v8_0_cp_gfx_resume(adev);
5167
5168         gfx_v8_0_cp_test_all_rings(adev);
5169
5170         adev->gfx.rlc.funcs->start(adev);
5171
5172         return 0;
5173 }
5174
5175 /**
5176  * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
5177  *
5178  * @adev: amdgpu_device pointer
5179  *
5180  * Fetches a GPU clock counter snapshot.
5181  * Returns the 64 bit clock counter snapshot.
5182  */
5183 static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
5184 {
5185         uint64_t clock;
5186
5187         mutex_lock(&adev->gfx.gpu_clock_mutex);
5188         WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
5189         clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
5190                 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5191         mutex_unlock(&adev->gfx.gpu_clock_mutex);
5192         return clock;
5193 }
5194
5195 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
5196                                           uint32_t vmid,
5197                                           uint32_t gds_base, uint32_t gds_size,
5198                                           uint32_t gws_base, uint32_t gws_size,
5199                                           uint32_t oa_base, uint32_t oa_size)
5200 {
5201         /* GDS Base */
5202         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5203         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5204                                 WRITE_DATA_DST_SEL(0)));
5205         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
5206         amdgpu_ring_write(ring, 0);
5207         amdgpu_ring_write(ring, gds_base);
5208
5209         /* GDS Size */
5210         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5211         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5212                                 WRITE_DATA_DST_SEL(0)));
5213         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
5214         amdgpu_ring_write(ring, 0);
5215         amdgpu_ring_write(ring, gds_size);
5216
5217         /* GWS */
5218         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5219         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5220                                 WRITE_DATA_DST_SEL(0)));
5221         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
5222         amdgpu_ring_write(ring, 0);
5223         amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
5224
5225         /* OA */
5226         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5227         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5228                                 WRITE_DATA_DST_SEL(0)));
5229         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
5230         amdgpu_ring_write(ring, 0);
5231         amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
5232 }
5233
5234 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
5235 {
5236         WREG32(mmSQ_IND_INDEX,
5237                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5238                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5239                 (address << SQ_IND_INDEX__INDEX__SHIFT) |
5240                 (SQ_IND_INDEX__FORCE_READ_MASK));
5241         return RREG32(mmSQ_IND_DATA);
5242 }
5243
5244 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
5245                            uint32_t wave, uint32_t thread,
5246                            uint32_t regno, uint32_t num, uint32_t *out)
5247 {
5248         WREG32(mmSQ_IND_INDEX,
5249                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5250                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5251                 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
5252                 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
5253                 (SQ_IND_INDEX__FORCE_READ_MASK) |
5254                 (SQ_IND_INDEX__AUTO_INCR_MASK));
5255         while (num--)
5256                 *(out++) = RREG32(mmSQ_IND_DATA);
5257 }
5258
5259 static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
5260 {
5261         /* type 0 wave data */
5262         dst[(*no_fields)++] = 0;
5263         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
5264         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
5265         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
5266         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
5267         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
5268         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
5269         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
5270         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
5271         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
5272         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
5273         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
5274         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
5275         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_LO);
5276         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_HI);
5277         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_LO);
5278         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI);
5279         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
5280         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
5281 }
5282
5283 static void gfx_v8_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
5284                                      uint32_t wave, uint32_t start,
5285                                      uint32_t size, uint32_t *dst)
5286 {
5287         wave_read_regs(
5288                 adev, simd, wave, 0,
5289                 start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
5290 }
5291
5292
5293 static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = {
5294         .get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter,
5295         .select_se_sh = &gfx_v8_0_select_se_sh,
5296         .read_wave_data = &gfx_v8_0_read_wave_data,
5297         .read_wave_sgprs = &gfx_v8_0_read_wave_sgprs,
5298         .select_me_pipe_q = &gfx_v8_0_select_me_pipe_q
5299 };
5300
5301 static int gfx_v8_0_early_init(void *handle)
5302 {
5303         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5304
5305         adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
5306         adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev),
5307                                           AMDGPU_MAX_COMPUTE_RINGS);
5308         adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
5309         gfx_v8_0_set_ring_funcs(adev);
5310         gfx_v8_0_set_irq_funcs(adev);
5311         gfx_v8_0_set_gds_init(adev);
5312         gfx_v8_0_set_rlc_funcs(adev);
5313
5314         return 0;
5315 }
5316
5317 static int gfx_v8_0_late_init(void *handle)
5318 {
5319         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5320         int r;
5321
5322         r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
5323         if (r)
5324                 return r;
5325
5326         r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
5327         if (r)
5328                 return r;
5329
5330         /* requires IBs so do in late init after IB pool is initialized */
5331         r = gfx_v8_0_do_edc_gpr_workarounds(adev);
5332         if (r)
5333                 return r;
5334
5335         r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0);
5336         if (r) {
5337                 DRM_ERROR("amdgpu_irq_get() failed to get IRQ for EDC, r: %d.\n", r);
5338                 return r;
5339         }
5340
5341         r = amdgpu_irq_get(adev, &adev->gfx.sq_irq, 0);
5342         if (r) {
5343                 DRM_ERROR(
5344                         "amdgpu_irq_get() failed to get IRQ for SQ, r: %d.\n",
5345                         r);
5346                 return r;
5347         }
5348
5349         return 0;
5350 }
5351
5352 static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
5353                                                        bool enable)
5354 {
5355         if ((adev->asic_type == CHIP_POLARIS11) ||
5356             (adev->asic_type == CHIP_POLARIS12) ||
5357             (adev->asic_type == CHIP_VEGAM))
5358                 /* Send msg to SMU via Powerplay */
5359                 amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, enable);
5360
5361         WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0);
5362 }
5363
5364 static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
5365                                                         bool enable)
5366 {
5367         WREG32_FIELD(RLC_PG_CNTL, DYN_PER_CU_PG_ENABLE, enable ? 1 : 0);
5368 }
5369
5370 static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
5371                 bool enable)
5372 {
5373         WREG32_FIELD(RLC_PG_CNTL, QUICK_PG_ENABLE, enable ? 1 : 0);
5374 }
5375
5376 static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
5377                                           bool enable)
5378 {
5379         WREG32_FIELD(RLC_PG_CNTL, GFX_POWER_GATING_ENABLE, enable ? 1 : 0);
5380 }
5381
5382 static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev,
5383                                                 bool enable)
5384 {
5385         WREG32_FIELD(RLC_PG_CNTL, GFX_PIPELINE_PG_ENABLE, enable ? 1 : 0);
5386
5387         /* Read any GFX register to wake up GFX. */
5388         if (!enable)
5389                 RREG32(mmDB_RENDER_CONTROL);
5390 }
5391
5392 static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev,
5393                                           bool enable)
5394 {
5395         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
5396                 cz_enable_gfx_cg_power_gating(adev, true);
5397                 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
5398                         cz_enable_gfx_pipeline_power_gating(adev, true);
5399         } else {
5400                 cz_enable_gfx_cg_power_gating(adev, false);
5401                 cz_enable_gfx_pipeline_power_gating(adev, false);
5402         }
5403 }
5404
5405 static int gfx_v8_0_set_powergating_state(void *handle,
5406                                           enum amd_powergating_state state)
5407 {
5408         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5409         bool enable = (state == AMD_PG_STATE_GATE);
5410
5411         if (amdgpu_sriov_vf(adev))
5412                 return 0;
5413
5414         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_SMG |
5415                                 AMD_PG_SUPPORT_RLC_SMU_HS |
5416                                 AMD_PG_SUPPORT_CP |
5417                                 AMD_PG_SUPPORT_GFX_DMG))
5418                 amdgpu_gfx_rlc_enter_safe_mode(adev);
5419         switch (adev->asic_type) {
5420         case CHIP_CARRIZO:
5421         case CHIP_STONEY:
5422
5423                 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
5424                         cz_enable_sck_slow_down_on_power_up(adev, true);
5425                         cz_enable_sck_slow_down_on_power_down(adev, true);
5426                 } else {
5427                         cz_enable_sck_slow_down_on_power_up(adev, false);
5428                         cz_enable_sck_slow_down_on_power_down(adev, false);
5429                 }
5430                 if (adev->pg_flags & AMD_PG_SUPPORT_CP)
5431                         cz_enable_cp_power_gating(adev, true);
5432                 else
5433                         cz_enable_cp_power_gating(adev, false);
5434
5435                 cz_update_gfx_cg_power_gating(adev, enable);
5436
5437                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5438                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5439                 else
5440                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5441
5442                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5443                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5444                 else
5445                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5446                 break;
5447         case CHIP_POLARIS11:
5448         case CHIP_POLARIS12:
5449         case CHIP_VEGAM:
5450                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5451                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5452                 else
5453                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5454
5455                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5456                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5457                 else
5458                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5459
5460                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable)
5461                         polaris11_enable_gfx_quick_mg_power_gating(adev, true);
5462                 else
5463                         polaris11_enable_gfx_quick_mg_power_gating(adev, false);
5464                 break;
5465         default:
5466                 break;
5467         }
5468         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_SMG |
5469                                 AMD_PG_SUPPORT_RLC_SMU_HS |
5470                                 AMD_PG_SUPPORT_CP |
5471                                 AMD_PG_SUPPORT_GFX_DMG))
5472                 amdgpu_gfx_rlc_exit_safe_mode(adev);
5473         return 0;
5474 }
5475
5476 static void gfx_v8_0_get_clockgating_state(void *handle, u32 *flags)
5477 {
5478         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5479         int data;
5480
5481         if (amdgpu_sriov_vf(adev))
5482                 *flags = 0;
5483
5484         /* AMD_CG_SUPPORT_GFX_MGCG */
5485         data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5486         if (!(data & RLC_CGTT_MGCG_OVERRIDE__CPF_MASK))
5487                 *flags |= AMD_CG_SUPPORT_GFX_MGCG;
5488
5489         /* AMD_CG_SUPPORT_GFX_CGLG */
5490         data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5491         if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5492                 *flags |= AMD_CG_SUPPORT_GFX_CGCG;
5493
5494         /* AMD_CG_SUPPORT_GFX_CGLS */
5495         if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5496                 *flags |= AMD_CG_SUPPORT_GFX_CGLS;
5497
5498         /* AMD_CG_SUPPORT_GFX_CGTS */
5499         data = RREG32(mmCGTS_SM_CTRL_REG);
5500         if (!(data & CGTS_SM_CTRL_REG__OVERRIDE_MASK))
5501                 *flags |= AMD_CG_SUPPORT_GFX_CGTS;
5502
5503         /* AMD_CG_SUPPORT_GFX_CGTS_LS */
5504         if (!(data & CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK))
5505                 *flags |= AMD_CG_SUPPORT_GFX_CGTS_LS;
5506
5507         /* AMD_CG_SUPPORT_GFX_RLC_LS */
5508         data = RREG32(mmRLC_MEM_SLP_CNTL);
5509         if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
5510                 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
5511
5512         /* AMD_CG_SUPPORT_GFX_CP_LS */
5513         data = RREG32(mmCP_MEM_SLP_CNTL);
5514         if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
5515                 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
5516 }
5517
5518 static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
5519                                      uint32_t reg_addr, uint32_t cmd)
5520 {
5521         uint32_t data;
5522
5523         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
5524
5525         WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5526         WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5527
5528         data = RREG32(mmRLC_SERDES_WR_CTRL);
5529         if (adev->asic_type == CHIP_STONEY)
5530                 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5531                           RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5532                           RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5533                           RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5534                           RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5535                           RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5536                           RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5537                           RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5538                           RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5539         else
5540                 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5541                           RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5542                           RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5543                           RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5544                           RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5545                           RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5546                           RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5547                           RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5548                           RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
5549                           RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
5550                           RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5551         data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
5552                  (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
5553                  (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
5554                  (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
5555
5556         WREG32(mmRLC_SERDES_WR_CTRL, data);
5557 }
5558
5559 #define MSG_ENTER_RLC_SAFE_MODE     1
5560 #define MSG_EXIT_RLC_SAFE_MODE      0
5561 #define RLC_GPR_REG2__REQ_MASK 0x00000001
5562 #define RLC_GPR_REG2__REQ__SHIFT 0
5563 #define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001
5564 #define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e
5565
5566 static bool gfx_v8_0_is_rlc_enabled(struct amdgpu_device *adev)
5567 {
5568         uint32_t rlc_setting;
5569
5570         rlc_setting = RREG32(mmRLC_CNTL);
5571         if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
5572                 return false;
5573
5574         return true;
5575 }
5576
5577 static void gfx_v8_0_set_safe_mode(struct amdgpu_device *adev)
5578 {
5579         uint32_t data;
5580         unsigned i;
5581         data = RREG32(mmRLC_CNTL);
5582         data |= RLC_SAFE_MODE__CMD_MASK;
5583         data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5584         data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
5585         WREG32(mmRLC_SAFE_MODE, data);
5586
5587         /* wait for RLC_SAFE_MODE */
5588         for (i = 0; i < adev->usec_timeout; i++) {
5589                 if ((RREG32(mmRLC_GPM_STAT) &
5590                      (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5591                       RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5592                     (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5593                      RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5594                         break;
5595                 udelay(1);
5596         }
5597         for (i = 0; i < adev->usec_timeout; i++) {
5598                 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5599                         break;
5600                 udelay(1);
5601         }
5602 }
5603
5604 static void gfx_v8_0_unset_safe_mode(struct amdgpu_device *adev)
5605 {
5606         uint32_t data;
5607         unsigned i;
5608
5609         data = RREG32(mmRLC_CNTL);
5610         data |= RLC_SAFE_MODE__CMD_MASK;
5611         data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5612         WREG32(mmRLC_SAFE_MODE, data);
5613
5614         for (i = 0; i < adev->usec_timeout; i++) {
5615                 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5616                         break;
5617                 udelay(1);
5618         }
5619 }
5620
5621 static void gfx_v8_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid)
5622 {
5623         u32 data;
5624
5625         if (amdgpu_sriov_is_pp_one_vf(adev))
5626                 data = RREG32_NO_KIQ(mmRLC_SPM_VMID);
5627         else
5628                 data = RREG32(mmRLC_SPM_VMID);
5629
5630         data &= ~RLC_SPM_VMID__RLC_SPM_VMID_MASK;
5631         data |= (vmid & RLC_SPM_VMID__RLC_SPM_VMID_MASK) << RLC_SPM_VMID__RLC_SPM_VMID__SHIFT;
5632
5633         if (amdgpu_sriov_is_pp_one_vf(adev))
5634                 WREG32_NO_KIQ(mmRLC_SPM_VMID, data);
5635         else
5636                 WREG32(mmRLC_SPM_VMID, data);
5637 }
5638
5639 static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
5640         .is_rlc_enabled = gfx_v8_0_is_rlc_enabled,
5641         .set_safe_mode = gfx_v8_0_set_safe_mode,
5642         .unset_safe_mode = gfx_v8_0_unset_safe_mode,
5643         .init = gfx_v8_0_rlc_init,
5644         .get_csb_size = gfx_v8_0_get_csb_size,
5645         .get_csb_buffer = gfx_v8_0_get_csb_buffer,
5646         .get_cp_table_num = gfx_v8_0_cp_jump_table_num,
5647         .resume = gfx_v8_0_rlc_resume,
5648         .stop = gfx_v8_0_rlc_stop,
5649         .reset = gfx_v8_0_rlc_reset,
5650         .start = gfx_v8_0_rlc_start,
5651         .update_spm_vmid = gfx_v8_0_update_spm_vmid
5652 };
5653
5654 static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
5655                                                       bool enable)
5656 {
5657         uint32_t temp, data;
5658
5659         amdgpu_gfx_rlc_enter_safe_mode(adev);
5660
5661         /* It is disabled by HW by default */
5662         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
5663                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5664                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS)
5665                                 /* 1 - RLC memory Light sleep */
5666                                 WREG32_FIELD(RLC_MEM_SLP_CNTL, RLC_MEM_LS_EN, 1);
5667
5668                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS)
5669                                 WREG32_FIELD(CP_MEM_SLP_CNTL, CP_MEM_LS_EN, 1);
5670                 }
5671
5672                 /* 3 - RLC_CGTT_MGCG_OVERRIDE */
5673                 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5674                 if (adev->flags & AMD_IS_APU)
5675                         data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5676                                   RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5677                                   RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK);
5678                 else
5679                         data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5680                                   RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5681                                   RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5682                                   RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5683
5684                 if (temp != data)
5685                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5686
5687                 /* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5688                 gfx_v8_0_wait_for_rlc_serdes(adev);
5689
5690                 /* 5 - clear mgcg override */
5691                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5692
5693                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
5694                         /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
5695                         temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5696                         data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
5697                         data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
5698                         data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
5699                         data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
5700                         if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
5701                             (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
5702                                 data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
5703                         data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
5704                         data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
5705                         if (temp != data)
5706                                 WREG32(mmCGTS_SM_CTRL_REG, data);
5707                 }
5708                 udelay(50);
5709
5710                 /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5711                 gfx_v8_0_wait_for_rlc_serdes(adev);
5712         } else {
5713                 /* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
5714                 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5715                 data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5716                                 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5717                                 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5718                                 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5719                 if (temp != data)
5720                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5721
5722                 /* 2 - disable MGLS in RLC */
5723                 data = RREG32(mmRLC_MEM_SLP_CNTL);
5724                 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
5725                         data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5726                         WREG32(mmRLC_MEM_SLP_CNTL, data);
5727                 }
5728
5729                 /* 3 - disable MGLS in CP */
5730                 data = RREG32(mmCP_MEM_SLP_CNTL);
5731                 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
5732                         data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5733                         WREG32(mmCP_MEM_SLP_CNTL, data);
5734                 }
5735
5736                 /* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
5737                 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5738                 data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
5739                                 CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
5740                 if (temp != data)
5741                         WREG32(mmCGTS_SM_CTRL_REG, data);
5742
5743                 /* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5744                 gfx_v8_0_wait_for_rlc_serdes(adev);
5745
5746                 /* 6 - set mgcg override */
5747                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5748
5749                 udelay(50);
5750
5751                 /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5752                 gfx_v8_0_wait_for_rlc_serdes(adev);
5753         }
5754
5755         amdgpu_gfx_rlc_exit_safe_mode(adev);
5756 }
5757
5758 static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5759                                                       bool enable)
5760 {
5761         uint32_t temp, temp1, data, data1;
5762
5763         temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5764
5765         amdgpu_gfx_rlc_enter_safe_mode(adev);
5766
5767         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
5768                 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5769                 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
5770                 if (temp1 != data1)
5771                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5772
5773                 /* : wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5774                 gfx_v8_0_wait_for_rlc_serdes(adev);
5775
5776                 /* 2 - clear cgcg override */
5777                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5778
5779                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5780                 gfx_v8_0_wait_for_rlc_serdes(adev);
5781
5782                 /* 3 - write cmd to set CGLS */
5783                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
5784
5785                 /* 4 - enable cgcg */
5786                 data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5787
5788                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5789                         /* enable cgls*/
5790                         data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5791
5792                         temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5793                         data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
5794
5795                         if (temp1 != data1)
5796                                 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5797                 } else {
5798                         data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5799                 }
5800
5801                 if (temp != data)
5802                         WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5803
5804                 /* 5 enable cntx_empty_int_enable/cntx_busy_int_enable/
5805                  * Cmp_busy/GFX_Idle interrupts
5806                  */
5807                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5808         } else {
5809                 /* disable cntx_empty_int_enable & GFX Idle interrupt */
5810                 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5811
5812                 /* TEST CGCG */
5813                 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5814                 data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
5815                                 RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
5816                 if (temp1 != data1)
5817                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5818
5819                 /* read gfx register to wake up cgcg */
5820                 RREG32(mmCB_CGTT_SCLK_CTRL);
5821                 RREG32(mmCB_CGTT_SCLK_CTRL);
5822                 RREG32(mmCB_CGTT_SCLK_CTRL);
5823                 RREG32(mmCB_CGTT_SCLK_CTRL);
5824
5825                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5826                 gfx_v8_0_wait_for_rlc_serdes(adev);
5827
5828                 /* write cmd to Set CGCG Overrride */
5829                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5830
5831                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5832                 gfx_v8_0_wait_for_rlc_serdes(adev);
5833
5834                 /* write cmd to Clear CGLS */
5835                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
5836
5837                 /* disable cgcg, cgls should be disabled too. */
5838                 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
5839                           RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
5840                 if (temp != data)
5841                         WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5842                 /* enable interrupts again for PG */
5843                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5844         }
5845
5846         gfx_v8_0_wait_for_rlc_serdes(adev);
5847
5848         amdgpu_gfx_rlc_exit_safe_mode(adev);
5849 }
5850 static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
5851                                             bool enable)
5852 {
5853         if (enable) {
5854                 /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
5855                  * ===  MGCG + MGLS + TS(CG/LS) ===
5856                  */
5857                 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5858                 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5859         } else {
5860                 /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
5861                  * ===  CGCG + CGLS ===
5862                  */
5863                 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5864                 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5865         }
5866         return 0;
5867 }
5868
5869 static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev,
5870                                           enum amd_clockgating_state state)
5871 {
5872         uint32_t msg_id, pp_state = 0;
5873         uint32_t pp_support_state = 0;
5874
5875         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
5876                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5877                         pp_support_state = PP_STATE_SUPPORT_LS;
5878                         pp_state = PP_STATE_LS;
5879                 }
5880                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
5881                         pp_support_state |= PP_STATE_SUPPORT_CG;
5882                         pp_state |= PP_STATE_CG;
5883                 }
5884                 if (state == AMD_CG_STATE_UNGATE)
5885                         pp_state = 0;
5886
5887                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5888                                 PP_BLOCK_GFX_CG,
5889                                 pp_support_state,
5890                                 pp_state);
5891                 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5892         }
5893
5894         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
5895                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5896                         pp_support_state = PP_STATE_SUPPORT_LS;
5897                         pp_state = PP_STATE_LS;
5898                 }
5899
5900                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
5901                         pp_support_state |= PP_STATE_SUPPORT_CG;
5902                         pp_state |= PP_STATE_CG;
5903                 }
5904
5905                 if (state == AMD_CG_STATE_UNGATE)
5906                         pp_state = 0;
5907
5908                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5909                                 PP_BLOCK_GFX_MG,
5910                                 pp_support_state,
5911                                 pp_state);
5912                 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5913         }
5914
5915         return 0;
5916 }
5917
5918 static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev,
5919                                           enum amd_clockgating_state state)
5920 {
5921
5922         uint32_t msg_id, pp_state = 0;
5923         uint32_t pp_support_state = 0;
5924
5925         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
5926                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5927                         pp_support_state = PP_STATE_SUPPORT_LS;
5928                         pp_state = PP_STATE_LS;
5929                 }
5930                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
5931                         pp_support_state |= PP_STATE_SUPPORT_CG;
5932                         pp_state |= PP_STATE_CG;
5933                 }
5934                 if (state == AMD_CG_STATE_UNGATE)
5935                         pp_state = 0;
5936
5937                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5938                                 PP_BLOCK_GFX_CG,
5939                                 pp_support_state,
5940                                 pp_state);
5941                 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5942         }
5943
5944         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_GFX_3D_CGLS)) {
5945                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) {
5946                         pp_support_state = PP_STATE_SUPPORT_LS;
5947                         pp_state = PP_STATE_LS;
5948                 }
5949                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) {
5950                         pp_support_state |= PP_STATE_SUPPORT_CG;
5951                         pp_state |= PP_STATE_CG;
5952                 }
5953                 if (state == AMD_CG_STATE_UNGATE)
5954                         pp_state = 0;
5955
5956                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5957                                 PP_BLOCK_GFX_3D,
5958                                 pp_support_state,
5959                                 pp_state);
5960                 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5961         }
5962
5963         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
5964                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5965                         pp_support_state = PP_STATE_SUPPORT_LS;
5966                         pp_state = PP_STATE_LS;
5967                 }
5968
5969                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
5970                         pp_support_state |= PP_STATE_SUPPORT_CG;
5971                         pp_state |= PP_STATE_CG;
5972                 }
5973
5974                 if (state == AMD_CG_STATE_UNGATE)
5975                         pp_state = 0;
5976
5977                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5978                                 PP_BLOCK_GFX_MG,
5979                                 pp_support_state,
5980                                 pp_state);
5981                 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5982         }
5983
5984         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
5985                 pp_support_state = PP_STATE_SUPPORT_LS;
5986
5987                 if (state == AMD_CG_STATE_UNGATE)
5988                         pp_state = 0;
5989                 else
5990                         pp_state = PP_STATE_LS;
5991
5992                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5993                                 PP_BLOCK_GFX_RLC,
5994                                 pp_support_state,
5995                                 pp_state);
5996                 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5997         }
5998
5999         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
6000                 pp_support_state = PP_STATE_SUPPORT_LS;
6001
6002                 if (state == AMD_CG_STATE_UNGATE)
6003                         pp_state = 0;
6004                 else
6005                         pp_state = PP_STATE_LS;
6006                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6007                         PP_BLOCK_GFX_CP,
6008                         pp_support_state,
6009                         pp_state);
6010                 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6011         }
6012
6013         return 0;
6014 }
6015
6016 static int gfx_v8_0_set_clockgating_state(void *handle,
6017                                           enum amd_clockgating_state state)
6018 {
6019         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
6020
6021         if (amdgpu_sriov_vf(adev))
6022                 return 0;
6023
6024         switch (adev->asic_type) {
6025         case CHIP_FIJI:
6026         case CHIP_CARRIZO:
6027         case CHIP_STONEY:
6028                 gfx_v8_0_update_gfx_clock_gating(adev,
6029                                                  state == AMD_CG_STATE_GATE);
6030                 break;
6031         case CHIP_TONGA:
6032                 gfx_v8_0_tonga_update_gfx_clock_gating(adev, state);
6033                 break;
6034         case CHIP_POLARIS10:
6035         case CHIP_POLARIS11:
6036         case CHIP_POLARIS12:
6037         case CHIP_VEGAM:
6038                 gfx_v8_0_polaris_update_gfx_clock_gating(adev, state);
6039                 break;
6040         default:
6041                 break;
6042         }
6043         return 0;
6044 }
6045
6046 static u64 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring)
6047 {
6048         return ring->adev->wb.wb[ring->rptr_offs];
6049 }
6050
6051 static u64 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
6052 {
6053         struct amdgpu_device *adev = ring->adev;
6054
6055         if (ring->use_doorbell)
6056                 /* XXX check if swapping is necessary on BE */
6057                 return ring->adev->wb.wb[ring->wptr_offs];
6058         else
6059                 return RREG32(mmCP_RB0_WPTR);
6060 }
6061
6062 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
6063 {
6064         struct amdgpu_device *adev = ring->adev;
6065
6066         if (ring->use_doorbell) {
6067                 /* XXX check if swapping is necessary on BE */
6068                 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6069                 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6070         } else {
6071                 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
6072                 (void)RREG32(mmCP_RB0_WPTR);
6073         }
6074 }
6075
6076 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
6077 {
6078         u32 ref_and_mask, reg_mem_engine;
6079
6080         if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) ||
6081             (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)) {
6082                 switch (ring->me) {
6083                 case 1:
6084                         ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
6085                         break;
6086                 case 2:
6087                         ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
6088                         break;
6089                 default:
6090                         return;
6091                 }
6092                 reg_mem_engine = 0;
6093         } else {
6094                 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
6095                 reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
6096         }
6097
6098         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6099         amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
6100                                  WAIT_REG_MEM_FUNCTION(3) |  /* == */
6101                                  reg_mem_engine));
6102         amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
6103         amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
6104         amdgpu_ring_write(ring, ref_and_mask);
6105         amdgpu_ring_write(ring, ref_and_mask);
6106         amdgpu_ring_write(ring, 0x20); /* poll interval */
6107 }
6108
6109 static void gfx_v8_0_ring_emit_vgt_flush(struct amdgpu_ring *ring)
6110 {
6111         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6112         amdgpu_ring_write(ring, EVENT_TYPE(VS_PARTIAL_FLUSH) |
6113                 EVENT_INDEX(4));
6114
6115         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6116         amdgpu_ring_write(ring, EVENT_TYPE(VGT_FLUSH) |
6117                 EVENT_INDEX(0));
6118 }
6119
6120 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
6121                                         struct amdgpu_job *job,
6122                                         struct amdgpu_ib *ib,
6123                                         uint32_t flags)
6124 {
6125         unsigned vmid = AMDGPU_JOB_GET_VMID(job);
6126         u32 header, control = 0;
6127
6128         if (ib->flags & AMDGPU_IB_FLAG_CE)
6129                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
6130         else
6131                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
6132
6133         control |= ib->length_dw | (vmid << 24);
6134
6135         if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
6136                 control |= INDIRECT_BUFFER_PRE_ENB(1);
6137
6138                 if (!(ib->flags & AMDGPU_IB_FLAG_CE) && vmid)
6139                         gfx_v8_0_ring_emit_de_meta(ring);
6140         }
6141
6142         amdgpu_ring_write(ring, header);
6143         amdgpu_ring_write(ring,
6144 #ifdef __BIG_ENDIAN
6145                           (2 << 0) |
6146 #endif
6147                           (ib->gpu_addr & 0xFFFFFFFC));
6148         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6149         amdgpu_ring_write(ring, control);
6150 }
6151
6152 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
6153                                           struct amdgpu_job *job,
6154                                           struct amdgpu_ib *ib,
6155                                           uint32_t flags)
6156 {
6157         unsigned vmid = AMDGPU_JOB_GET_VMID(job);
6158         u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
6159
6160         /* Currently, there is a high possibility to get wave ID mismatch
6161          * between ME and GDS, leading to a hw deadlock, because ME generates
6162          * different wave IDs than the GDS expects. This situation happens
6163          * randomly when at least 5 compute pipes use GDS ordered append.
6164          * The wave IDs generated by ME are also wrong after suspend/resume.
6165          * Those are probably bugs somewhere else in the kernel driver.
6166          *
6167          * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
6168          * GDS to 0 for this ring (me/pipe).
6169          */
6170         if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
6171                 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
6172                 amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID - PACKET3_SET_CONFIG_REG_START);
6173                 amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
6174         }
6175
6176         amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
6177         amdgpu_ring_write(ring,
6178 #ifdef __BIG_ENDIAN
6179                                 (2 << 0) |
6180 #endif
6181                                 (ib->gpu_addr & 0xFFFFFFFC));
6182         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6183         amdgpu_ring_write(ring, control);
6184 }
6185
6186 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
6187                                          u64 seq, unsigned flags)
6188 {
6189         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6190         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6191
6192         /* Workaround for cache flush problems. First send a dummy EOP
6193          * event down the pipe with seq one below.
6194          */
6195         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6196         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6197                                  EOP_TC_ACTION_EN |
6198                                  EOP_TC_WB_ACTION_EN |
6199                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6200                                  EVENT_INDEX(5)));
6201         amdgpu_ring_write(ring, addr & 0xfffffffc);
6202         amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
6203                                 DATA_SEL(1) | INT_SEL(0));
6204         amdgpu_ring_write(ring, lower_32_bits(seq - 1));
6205         amdgpu_ring_write(ring, upper_32_bits(seq - 1));
6206
6207         /* Then send the real EOP event down the pipe:
6208          * EVENT_WRITE_EOP - flush caches, send int */
6209         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6210         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6211                                  EOP_TC_ACTION_EN |
6212                                  EOP_TC_WB_ACTION_EN |
6213                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6214                                  EVENT_INDEX(5)));
6215         amdgpu_ring_write(ring, addr & 0xfffffffc);
6216         amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
6217                           DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6218         amdgpu_ring_write(ring, lower_32_bits(seq));
6219         amdgpu_ring_write(ring, upper_32_bits(seq));
6220
6221 }
6222
6223 static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
6224 {
6225         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6226         uint32_t seq = ring->fence_drv.sync_seq;
6227         uint64_t addr = ring->fence_drv.gpu_addr;
6228
6229         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6230         amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
6231                                  WAIT_REG_MEM_FUNCTION(3) | /* equal */
6232                                  WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
6233         amdgpu_ring_write(ring, addr & 0xfffffffc);
6234         amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
6235         amdgpu_ring_write(ring, seq);
6236         amdgpu_ring_write(ring, 0xffffffff);
6237         amdgpu_ring_write(ring, 4); /* poll interval */
6238 }
6239
6240 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
6241                                         unsigned vmid, uint64_t pd_addr)
6242 {
6243         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6244
6245         amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
6246
6247         /* wait for the invalidate to complete */
6248         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6249         amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6250                                  WAIT_REG_MEM_FUNCTION(0) |  /* always */
6251                                  WAIT_REG_MEM_ENGINE(0))); /* me */
6252         amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6253         amdgpu_ring_write(ring, 0);
6254         amdgpu_ring_write(ring, 0); /* ref */
6255         amdgpu_ring_write(ring, 0); /* mask */
6256         amdgpu_ring_write(ring, 0x20); /* poll interval */
6257
6258         /* compute doesn't have PFP */
6259         if (usepfp) {
6260                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
6261                 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6262                 amdgpu_ring_write(ring, 0x0);
6263         }
6264 }
6265
6266 static u64 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
6267 {
6268         return ring->adev->wb.wb[ring->wptr_offs];
6269 }
6270
6271 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
6272 {
6273         struct amdgpu_device *adev = ring->adev;
6274
6275         /* XXX check if swapping is necessary on BE */
6276         adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6277         WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6278 }
6279
6280 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
6281                                              u64 addr, u64 seq,
6282                                              unsigned flags)
6283 {
6284         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6285         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6286
6287         /* RELEASE_MEM - flush caches, send int */
6288         amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
6289         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6290                                  EOP_TC_ACTION_EN |
6291                                  EOP_TC_WB_ACTION_EN |
6292                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6293                                  EVENT_INDEX(5)));
6294         amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6295         amdgpu_ring_write(ring, addr & 0xfffffffc);
6296         amdgpu_ring_write(ring, upper_32_bits(addr));
6297         amdgpu_ring_write(ring, lower_32_bits(seq));
6298         amdgpu_ring_write(ring, upper_32_bits(seq));
6299 }
6300
6301 static void gfx_v8_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
6302                                          u64 seq, unsigned int flags)
6303 {
6304         /* we only allocate 32bit for each seq wb address */
6305         BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
6306
6307         /* write fence seq to the "addr" */
6308         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6309         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6310                                  WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
6311         amdgpu_ring_write(ring, lower_32_bits(addr));
6312         amdgpu_ring_write(ring, upper_32_bits(addr));
6313         amdgpu_ring_write(ring, lower_32_bits(seq));
6314
6315         if (flags & AMDGPU_FENCE_FLAG_INT) {
6316                 /* set register to trigger INT */
6317                 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6318                 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6319                                          WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
6320                 amdgpu_ring_write(ring, mmCPC_INT_STATUS);
6321                 amdgpu_ring_write(ring, 0);
6322                 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
6323         }
6324 }
6325
6326 static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring)
6327 {
6328         amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6329         amdgpu_ring_write(ring, 0);
6330 }
6331
6332 static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
6333 {
6334         uint32_t dw2 = 0;
6335
6336         if (amdgpu_sriov_vf(ring->adev))
6337                 gfx_v8_0_ring_emit_ce_meta(ring);
6338
6339         dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
6340         if (flags & AMDGPU_HAVE_CTX_SWITCH) {
6341                 gfx_v8_0_ring_emit_vgt_flush(ring);
6342                 /* set load_global_config & load_global_uconfig */
6343                 dw2 |= 0x8001;
6344                 /* set load_cs_sh_regs */
6345                 dw2 |= 0x01000000;
6346                 /* set load_per_context_state & load_gfx_sh_regs for GFX */
6347                 dw2 |= 0x10002;
6348
6349                 /* set load_ce_ram if preamble presented */
6350                 if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
6351                         dw2 |= 0x10000000;
6352         } else {
6353                 /* still load_ce_ram if this is the first time preamble presented
6354                  * although there is no context switch happens.
6355                  */
6356                 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
6357                         dw2 |= 0x10000000;
6358         }
6359
6360         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6361         amdgpu_ring_write(ring, dw2);
6362         amdgpu_ring_write(ring, 0);
6363 }
6364
6365 static unsigned gfx_v8_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
6366 {
6367         unsigned ret;
6368
6369         amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
6370         amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
6371         amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
6372         amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
6373         ret = ring->wptr & ring->buf_mask;
6374         amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
6375         return ret;
6376 }
6377
6378 static void gfx_v8_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
6379 {
6380         unsigned cur;
6381
6382         BUG_ON(offset > ring->buf_mask);
6383         BUG_ON(ring->ring[offset] != 0x55aa55aa);
6384
6385         cur = (ring->wptr & ring->buf_mask) - 1;
6386         if (likely(cur > offset))
6387                 ring->ring[offset] = cur - offset;
6388         else
6389                 ring->ring[offset] = (ring->ring_size >> 2) - offset + cur;
6390 }
6391
6392 static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg,
6393                                     uint32_t reg_val_offs)
6394 {
6395         struct amdgpu_device *adev = ring->adev;
6396
6397         amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
6398         amdgpu_ring_write(ring, 0 |     /* src: register*/
6399                                 (5 << 8) |      /* dst: memory */
6400                                 (1 << 20));     /* write confirm */
6401         amdgpu_ring_write(ring, reg);
6402         amdgpu_ring_write(ring, 0);
6403         amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
6404                                 reg_val_offs * 4));
6405         amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
6406                                 reg_val_offs * 4));
6407 }
6408
6409 static void gfx_v8_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
6410                                   uint32_t val)
6411 {
6412         uint32_t cmd;
6413
6414         switch (ring->funcs->type) {
6415         case AMDGPU_RING_TYPE_GFX:
6416                 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
6417                 break;
6418         case AMDGPU_RING_TYPE_KIQ:
6419                 cmd = 1 << 16; /* no inc addr */
6420                 break;
6421         default:
6422                 cmd = WR_CONFIRM;
6423                 break;
6424         }
6425
6426         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6427         amdgpu_ring_write(ring, cmd);
6428         amdgpu_ring_write(ring, reg);
6429         amdgpu_ring_write(ring, 0);
6430         amdgpu_ring_write(ring, val);
6431 }
6432
6433 static void gfx_v8_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
6434 {
6435         struct amdgpu_device *adev = ring->adev;
6436         uint32_t value = 0;
6437
6438         value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
6439         value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
6440         value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
6441         value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
6442         WREG32(mmSQ_CMD, value);
6443 }
6444
6445 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
6446                                                  enum amdgpu_interrupt_state state)
6447 {
6448         WREG32_FIELD(CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE,
6449                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6450 }
6451
6452 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
6453                                                      int me, int pipe,
6454                                                      enum amdgpu_interrupt_state state)
6455 {
6456         u32 mec_int_cntl, mec_int_cntl_reg;
6457
6458         /*
6459          * amdgpu controls only the first MEC. That's why this function only
6460          * handles the setting of interrupts for this specific MEC. All other
6461          * pipes' interrupts are set by amdkfd.
6462          */
6463
6464         if (me == 1) {
6465                 switch (pipe) {
6466                 case 0:
6467                         mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
6468                         break;
6469                 case 1:
6470                         mec_int_cntl_reg = mmCP_ME1_PIPE1_INT_CNTL;
6471                         break;
6472                 case 2:
6473                         mec_int_cntl_reg = mmCP_ME1_PIPE2_INT_CNTL;
6474                         break;
6475                 case 3:
6476                         mec_int_cntl_reg = mmCP_ME1_PIPE3_INT_CNTL;
6477                         break;
6478                 default:
6479                         DRM_DEBUG("invalid pipe %d\n", pipe);
6480                         return;
6481                 }
6482         } else {
6483                 DRM_DEBUG("invalid me %d\n", me);
6484                 return;
6485         }
6486
6487         switch (state) {
6488         case AMDGPU_IRQ_STATE_DISABLE:
6489                 mec_int_cntl = RREG32(mec_int_cntl_reg);
6490                 mec_int_cntl &= ~CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6491                 WREG32(mec_int_cntl_reg, mec_int_cntl);
6492                 break;
6493         case AMDGPU_IRQ_STATE_ENABLE:
6494                 mec_int_cntl = RREG32(mec_int_cntl_reg);
6495                 mec_int_cntl |= CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6496                 WREG32(mec_int_cntl_reg, mec_int_cntl);
6497                 break;
6498         default:
6499                 break;
6500         }
6501 }
6502
6503 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
6504                                              struct amdgpu_irq_src *source,
6505                                              unsigned type,
6506                                              enum amdgpu_interrupt_state state)
6507 {
6508         WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE,
6509                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6510
6511         return 0;
6512 }
6513
6514 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
6515                                               struct amdgpu_irq_src *source,
6516                                               unsigned type,
6517                                               enum amdgpu_interrupt_state state)
6518 {
6519         WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE,
6520                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6521
6522         return 0;
6523 }
6524
6525 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6526                                             struct amdgpu_irq_src *src,
6527                                             unsigned type,
6528                                             enum amdgpu_interrupt_state state)
6529 {
6530         switch (type) {
6531         case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
6532                 gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
6533                 break;
6534         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6535                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6536                 break;
6537         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6538                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6539                 break;
6540         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6541                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6542                 break;
6543         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6544                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6545                 break;
6546         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6547                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6548                 break;
6549         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6550                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6551                 break;
6552         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6553                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6554                 break;
6555         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6556                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6557                 break;
6558         default:
6559                 break;
6560         }
6561         return 0;
6562 }
6563
6564 static int gfx_v8_0_set_cp_ecc_int_state(struct amdgpu_device *adev,
6565                                          struct amdgpu_irq_src *source,
6566                                          unsigned int type,
6567                                          enum amdgpu_interrupt_state state)
6568 {
6569         int enable_flag;
6570
6571         switch (state) {
6572         case AMDGPU_IRQ_STATE_DISABLE:
6573                 enable_flag = 0;
6574                 break;
6575
6576         case AMDGPU_IRQ_STATE_ENABLE:
6577                 enable_flag = 1;
6578                 break;
6579
6580         default:
6581                 return -EINVAL;
6582         }
6583
6584         WREG32_FIELD(CP_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6585         WREG32_FIELD(CP_INT_CNTL_RING0, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6586         WREG32_FIELD(CP_INT_CNTL_RING1, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6587         WREG32_FIELD(CP_INT_CNTL_RING2, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6588         WREG32_FIELD(CPC_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6589         WREG32_FIELD(CP_ME1_PIPE0_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6590                      enable_flag);
6591         WREG32_FIELD(CP_ME1_PIPE1_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6592                      enable_flag);
6593         WREG32_FIELD(CP_ME1_PIPE2_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6594                      enable_flag);
6595         WREG32_FIELD(CP_ME1_PIPE3_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6596                      enable_flag);
6597         WREG32_FIELD(CP_ME2_PIPE0_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6598                      enable_flag);
6599         WREG32_FIELD(CP_ME2_PIPE1_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6600                      enable_flag);
6601         WREG32_FIELD(CP_ME2_PIPE2_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6602                      enable_flag);
6603         WREG32_FIELD(CP_ME2_PIPE3_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6604                      enable_flag);
6605
6606         return 0;
6607 }
6608
6609 static int gfx_v8_0_set_sq_int_state(struct amdgpu_device *adev,
6610                                      struct amdgpu_irq_src *source,
6611                                      unsigned int type,
6612                                      enum amdgpu_interrupt_state state)
6613 {
6614         int enable_flag;
6615
6616         switch (state) {
6617         case AMDGPU_IRQ_STATE_DISABLE:
6618                 enable_flag = 1;
6619                 break;
6620
6621         case AMDGPU_IRQ_STATE_ENABLE:
6622                 enable_flag = 0;
6623                 break;
6624
6625         default:
6626                 return -EINVAL;
6627         }
6628
6629         WREG32_FIELD(SQ_INTERRUPT_MSG_CTRL, STALL,
6630                      enable_flag);
6631
6632         return 0;
6633 }
6634
6635 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
6636                             struct amdgpu_irq_src *source,
6637                             struct amdgpu_iv_entry *entry)
6638 {
6639         int i;
6640         u8 me_id, pipe_id, queue_id;
6641         struct amdgpu_ring *ring;
6642
6643         DRM_DEBUG("IH: CP EOP\n");
6644         me_id = (entry->ring_id & 0x0c) >> 2;
6645         pipe_id = (entry->ring_id & 0x03) >> 0;
6646         queue_id = (entry->ring_id & 0x70) >> 4;
6647
6648         switch (me_id) {
6649         case 0:
6650                 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6651                 break;
6652         case 1:
6653         case 2:
6654                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6655                         ring = &adev->gfx.compute_ring[i];
6656                         /* Per-queue interrupt is supported for MEC starting from VI.
6657                           * The interrupt can only be enabled/disabled per pipe instead of per queue.
6658                           */
6659                         if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6660                                 amdgpu_fence_process(ring);
6661                 }
6662                 break;
6663         }
6664         return 0;
6665 }
6666
6667 static void gfx_v8_0_fault(struct amdgpu_device *adev,
6668                            struct amdgpu_iv_entry *entry)
6669 {
6670         u8 me_id, pipe_id, queue_id;
6671         struct amdgpu_ring *ring;
6672         int i;
6673
6674         me_id = (entry->ring_id & 0x0c) >> 2;
6675         pipe_id = (entry->ring_id & 0x03) >> 0;
6676         queue_id = (entry->ring_id & 0x70) >> 4;
6677
6678         switch (me_id) {
6679         case 0:
6680                 drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
6681                 break;
6682         case 1:
6683         case 2:
6684                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6685                         ring = &adev->gfx.compute_ring[i];
6686                         if (ring->me == me_id && ring->pipe == pipe_id &&
6687                             ring->queue == queue_id)
6688                                 drm_sched_fault(&ring->sched);
6689                 }
6690                 break;
6691         }
6692 }
6693
6694 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
6695                                  struct amdgpu_irq_src *source,
6696                                  struct amdgpu_iv_entry *entry)
6697 {
6698         DRM_ERROR("Illegal register access in command stream\n");
6699         gfx_v8_0_fault(adev, entry);
6700         return 0;
6701 }
6702
6703 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
6704                                   struct amdgpu_irq_src *source,
6705                                   struct amdgpu_iv_entry *entry)
6706 {
6707         DRM_ERROR("Illegal instruction in command stream\n");
6708         gfx_v8_0_fault(adev, entry);
6709         return 0;
6710 }
6711
6712 static int gfx_v8_0_cp_ecc_error_irq(struct amdgpu_device *adev,
6713                                      struct amdgpu_irq_src *source,
6714                                      struct amdgpu_iv_entry *entry)
6715 {
6716         DRM_ERROR("CP EDC/ECC error detected.");
6717         return 0;
6718 }
6719
6720 static void gfx_v8_0_parse_sq_irq(struct amdgpu_device *adev, unsigned ih_data)
6721 {
6722         u32 enc, se_id, sh_id, cu_id;
6723         char type[20];
6724         int sq_edc_source = -1;
6725
6726         enc = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_CMN, ENCODING);
6727         se_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_CMN, SE_ID);
6728
6729         switch (enc) {
6730                 case 0:
6731                         DRM_INFO("SQ general purpose intr detected:"
6732                                         "se_id %d, immed_overflow %d, host_reg_overflow %d,"
6733                                         "host_cmd_overflow %d, cmd_timestamp %d,"
6734                                         "reg_timestamp %d, thread_trace_buff_full %d,"
6735                                         "wlt %d, thread_trace %d.\n",
6736                                         se_id,
6737                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, IMMED_OVERFLOW),
6738                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, HOST_REG_OVERFLOW),
6739                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, HOST_CMD_OVERFLOW),
6740                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, CMD_TIMESTAMP),
6741                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, REG_TIMESTAMP),
6742                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, THREAD_TRACE_BUF_FULL),
6743                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, WLT),
6744                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, THREAD_TRACE)
6745                                         );
6746                         break;
6747                 case 1:
6748                 case 2:
6749
6750                         cu_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, CU_ID);
6751                         sh_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, SH_ID);
6752
6753                         /*
6754                          * This function can be called either directly from ISR
6755                          * or from BH in which case we can access SQ_EDC_INFO
6756                          * instance
6757                          */
6758                         if (in_task()) {
6759                                 mutex_lock(&adev->grbm_idx_mutex);
6760                                 gfx_v8_0_select_se_sh(adev, se_id, sh_id, cu_id);
6761
6762                                 sq_edc_source = REG_GET_FIELD(RREG32(mmSQ_EDC_INFO), SQ_EDC_INFO, SOURCE);
6763
6764                                 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6765                                 mutex_unlock(&adev->grbm_idx_mutex);
6766                         }
6767
6768                         if (enc == 1)
6769                                 sprintf(type, "instruction intr");
6770                         else
6771                                 sprintf(type, "EDC/ECC error");
6772
6773                         DRM_INFO(
6774                                 "SQ %s detected: "
6775                                         "se_id %d, sh_id %d, cu_id %d, simd_id %d, wave_id %d, vm_id %d "
6776                                         "trap %s, sq_ed_info.source %s.\n",
6777                                         type, se_id, sh_id, cu_id,
6778                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, SIMD_ID),
6779                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, WAVE_ID),
6780                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, VM_ID),
6781                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, PRIV) ? "true" : "false",
6782                                         (sq_edc_source != -1) ? sq_edc_source_names[sq_edc_source] : "unavailable"
6783                                 );
6784                         break;
6785                 default:
6786                         DRM_ERROR("SQ invalid encoding type\n.");
6787         }
6788 }
6789
6790 static void gfx_v8_0_sq_irq_work_func(struct work_struct *work)
6791 {
6792
6793         struct amdgpu_device *adev = container_of(work, struct amdgpu_device, gfx.sq_work.work);
6794         struct sq_work *sq_work = container_of(work, struct sq_work, work);
6795
6796         gfx_v8_0_parse_sq_irq(adev, sq_work->ih_data);
6797 }
6798
6799 static int gfx_v8_0_sq_irq(struct amdgpu_device *adev,
6800                            struct amdgpu_irq_src *source,
6801                            struct amdgpu_iv_entry *entry)
6802 {
6803         unsigned ih_data = entry->src_data[0];
6804
6805         /*
6806          * Try to submit work so SQ_EDC_INFO can be accessed from
6807          * BH. If previous work submission hasn't finished yet
6808          * just print whatever info is possible directly from the ISR.
6809          */
6810         if (work_pending(&adev->gfx.sq_work.work)) {
6811                 gfx_v8_0_parse_sq_irq(adev, ih_data);
6812         } else {
6813                 adev->gfx.sq_work.ih_data = ih_data;
6814                 schedule_work(&adev->gfx.sq_work.work);
6815         }
6816
6817         return 0;
6818 }
6819
6820 static void gfx_v8_0_emit_mem_sync(struct amdgpu_ring *ring)
6821 {
6822         amdgpu_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
6823         amdgpu_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
6824                           PACKET3_TC_ACTION_ENA |
6825                           PACKET3_SH_KCACHE_ACTION_ENA |
6826                           PACKET3_SH_ICACHE_ACTION_ENA |
6827                           PACKET3_TC_WB_ACTION_ENA);  /* CP_COHER_CNTL */
6828         amdgpu_ring_write(ring, 0xffffffff);  /* CP_COHER_SIZE */
6829         amdgpu_ring_write(ring, 0);  /* CP_COHER_BASE */
6830         amdgpu_ring_write(ring, 0x0000000A); /* poll interval */
6831 }
6832
6833 static void gfx_v8_0_emit_mem_sync_compute(struct amdgpu_ring *ring)
6834 {
6835         amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 5));
6836         amdgpu_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
6837                           PACKET3_TC_ACTION_ENA |
6838                           PACKET3_SH_KCACHE_ACTION_ENA |
6839                           PACKET3_SH_ICACHE_ACTION_ENA |
6840                           PACKET3_TC_WB_ACTION_ENA);  /* CP_COHER_CNTL */
6841         amdgpu_ring_write(ring, 0xffffffff);    /* CP_COHER_SIZE */
6842         amdgpu_ring_write(ring, 0xff);          /* CP_COHER_SIZE_HI */
6843         amdgpu_ring_write(ring, 0);             /* CP_COHER_BASE */
6844         amdgpu_ring_write(ring, 0);             /* CP_COHER_BASE_HI */
6845         amdgpu_ring_write(ring, 0x0000000A);    /* poll interval */
6846 }
6847
6848 static const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
6849         .name = "gfx_v8_0",
6850         .early_init = gfx_v8_0_early_init,
6851         .late_init = gfx_v8_0_late_init,
6852         .sw_init = gfx_v8_0_sw_init,
6853         .sw_fini = gfx_v8_0_sw_fini,
6854         .hw_init = gfx_v8_0_hw_init,
6855         .hw_fini = gfx_v8_0_hw_fini,
6856         .suspend = gfx_v8_0_suspend,
6857         .resume = gfx_v8_0_resume,
6858         .is_idle = gfx_v8_0_is_idle,
6859         .wait_for_idle = gfx_v8_0_wait_for_idle,
6860         .check_soft_reset = gfx_v8_0_check_soft_reset,
6861         .pre_soft_reset = gfx_v8_0_pre_soft_reset,
6862         .soft_reset = gfx_v8_0_soft_reset,
6863         .post_soft_reset = gfx_v8_0_post_soft_reset,
6864         .set_clockgating_state = gfx_v8_0_set_clockgating_state,
6865         .set_powergating_state = gfx_v8_0_set_powergating_state,
6866         .get_clockgating_state = gfx_v8_0_get_clockgating_state,
6867 };
6868
6869 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
6870         .type = AMDGPU_RING_TYPE_GFX,
6871         .align_mask = 0xff,
6872         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6873         .support_64bit_ptrs = false,
6874         .get_rptr = gfx_v8_0_ring_get_rptr,
6875         .get_wptr = gfx_v8_0_ring_get_wptr_gfx,
6876         .set_wptr = gfx_v8_0_ring_set_wptr_gfx,
6877         .emit_frame_size = /* maximum 215dw if count 16 IBs in */
6878                 5 +  /* COND_EXEC */
6879                 7 +  /* PIPELINE_SYNC */
6880                 VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 9 + /* VM_FLUSH */
6881                 12 +  /* FENCE for VM_FLUSH */
6882                 20 + /* GDS switch */
6883                 4 + /* double SWITCH_BUFFER,
6884                        the first COND_EXEC jump to the place just
6885                            prior to this double SWITCH_BUFFER  */
6886                 5 + /* COND_EXEC */
6887                 7 +      /*     HDP_flush */
6888                 4 +      /*     VGT_flush */
6889                 14 + /* CE_META */
6890                 31 + /* DE_META */
6891                 3 + /* CNTX_CTRL */
6892                 5 + /* HDP_INVL */
6893                 12 + 12 + /* FENCE x2 */
6894                 2 + /* SWITCH_BUFFER */
6895                 5, /* SURFACE_SYNC */
6896         .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_gfx */
6897         .emit_ib = gfx_v8_0_ring_emit_ib_gfx,
6898         .emit_fence = gfx_v8_0_ring_emit_fence_gfx,
6899         .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6900         .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6901         .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6902         .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6903         .test_ring = gfx_v8_0_ring_test_ring,
6904         .test_ib = gfx_v8_0_ring_test_ib,
6905         .insert_nop = amdgpu_ring_insert_nop,
6906         .pad_ib = amdgpu_ring_generic_pad_ib,
6907         .emit_switch_buffer = gfx_v8_ring_emit_sb,
6908         .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
6909         .init_cond_exec = gfx_v8_0_ring_emit_init_cond_exec,
6910         .patch_cond_exec = gfx_v8_0_ring_emit_patch_cond_exec,
6911         .emit_wreg = gfx_v8_0_ring_emit_wreg,
6912         .soft_recovery = gfx_v8_0_ring_soft_recovery,
6913         .emit_mem_sync = gfx_v8_0_emit_mem_sync,
6914 };
6915
6916 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
6917         .type = AMDGPU_RING_TYPE_COMPUTE,
6918         .align_mask = 0xff,
6919         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6920         .support_64bit_ptrs = false,
6921         .get_rptr = gfx_v8_0_ring_get_rptr,
6922         .get_wptr = gfx_v8_0_ring_get_wptr_compute,
6923         .set_wptr = gfx_v8_0_ring_set_wptr_compute,
6924         .emit_frame_size =
6925                 20 + /* gfx_v8_0_ring_emit_gds_switch */
6926                 7 + /* gfx_v8_0_ring_emit_hdp_flush */
6927                 5 + /* hdp_invalidate */
6928                 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6929                 VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v8_0_ring_emit_vm_flush */
6930                 7 + 7 + 7 + /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
6931                 7, /* gfx_v8_0_emit_mem_sync_compute */
6932         .emit_ib_size = 7, /* gfx_v8_0_ring_emit_ib_compute */
6933         .emit_ib = gfx_v8_0_ring_emit_ib_compute,
6934         .emit_fence = gfx_v8_0_ring_emit_fence_compute,
6935         .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6936         .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6937         .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6938         .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6939         .test_ring = gfx_v8_0_ring_test_ring,
6940         .test_ib = gfx_v8_0_ring_test_ib,
6941         .insert_nop = amdgpu_ring_insert_nop,
6942         .pad_ib = amdgpu_ring_generic_pad_ib,
6943         .emit_wreg = gfx_v8_0_ring_emit_wreg,
6944         .emit_mem_sync = gfx_v8_0_emit_mem_sync_compute,
6945 };
6946
6947 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {
6948         .type = AMDGPU_RING_TYPE_KIQ,
6949         .align_mask = 0xff,
6950         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6951         .support_64bit_ptrs = false,
6952         .get_rptr = gfx_v8_0_ring_get_rptr,
6953         .get_wptr = gfx_v8_0_ring_get_wptr_compute,
6954         .set_wptr = gfx_v8_0_ring_set_wptr_compute,
6955         .emit_frame_size =
6956                 20 + /* gfx_v8_0_ring_emit_gds_switch */
6957                 7 + /* gfx_v8_0_ring_emit_hdp_flush */
6958                 5 + /* hdp_invalidate */
6959                 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6960                 17 + /* gfx_v8_0_ring_emit_vm_flush */
6961                 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */
6962         .emit_ib_size = 7, /* gfx_v8_0_ring_emit_ib_compute */
6963         .emit_fence = gfx_v8_0_ring_emit_fence_kiq,
6964         .test_ring = gfx_v8_0_ring_test_ring,
6965         .insert_nop = amdgpu_ring_insert_nop,
6966         .pad_ib = amdgpu_ring_generic_pad_ib,
6967         .emit_rreg = gfx_v8_0_ring_emit_rreg,
6968         .emit_wreg = gfx_v8_0_ring_emit_wreg,
6969 };
6970
6971 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
6972 {
6973         int i;
6974
6975         adev->gfx.kiq.ring.funcs = &gfx_v8_0_ring_funcs_kiq;
6976
6977         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
6978                 adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
6979
6980         for (i = 0; i < adev->gfx.num_compute_rings; i++)
6981                 adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
6982 }
6983
6984 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
6985         .set = gfx_v8_0_set_eop_interrupt_state,
6986         .process = gfx_v8_0_eop_irq,
6987 };
6988
6989 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
6990         .set = gfx_v8_0_set_priv_reg_fault_state,
6991         .process = gfx_v8_0_priv_reg_irq,
6992 };
6993
6994 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
6995         .set = gfx_v8_0_set_priv_inst_fault_state,
6996         .process = gfx_v8_0_priv_inst_irq,
6997 };
6998
6999 static const struct amdgpu_irq_src_funcs gfx_v8_0_cp_ecc_error_irq_funcs = {
7000         .set = gfx_v8_0_set_cp_ecc_int_state,
7001         .process = gfx_v8_0_cp_ecc_error_irq,
7002 };
7003
7004 static const struct amdgpu_irq_src_funcs gfx_v8_0_sq_irq_funcs = {
7005         .set = gfx_v8_0_set_sq_int_state,
7006         .process = gfx_v8_0_sq_irq,
7007 };
7008
7009 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
7010 {
7011         adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
7012         adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
7013
7014         adev->gfx.priv_reg_irq.num_types = 1;
7015         adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
7016
7017         adev->gfx.priv_inst_irq.num_types = 1;
7018         adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
7019
7020         adev->gfx.cp_ecc_error_irq.num_types = 1;
7021         adev->gfx.cp_ecc_error_irq.funcs = &gfx_v8_0_cp_ecc_error_irq_funcs;
7022
7023         adev->gfx.sq_irq.num_types = 1;
7024         adev->gfx.sq_irq.funcs = &gfx_v8_0_sq_irq_funcs;
7025 }
7026
7027 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
7028 {
7029         adev->gfx.rlc.funcs = &iceland_rlc_funcs;
7030 }
7031
7032 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
7033 {
7034         /* init asci gds info */
7035         adev->gds.gds_size = RREG32(mmGDS_VMID0_SIZE);
7036         adev->gds.gws_size = 64;
7037         adev->gds.oa_size = 16;
7038         adev->gds.gds_compute_max_wave_id = RREG32(mmGDS_COMPUTE_MAX_WAVE_ID);
7039 }
7040
7041 static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
7042                                                  u32 bitmap)
7043 {
7044         u32 data;
7045
7046         if (!bitmap)
7047                 return;
7048
7049         data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7050         data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7051
7052         WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
7053 }
7054
7055 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
7056 {
7057         u32 data, mask;
7058
7059         data =  RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) |
7060                 RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
7061
7062         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
7063
7064         return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask;
7065 }
7066
7067 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
7068 {
7069         int i, j, k, counter, active_cu_number = 0;
7070         u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
7071         struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
7072         unsigned disable_masks[4 * 2];
7073         u32 ao_cu_num;
7074
7075         memset(cu_info, 0, sizeof(*cu_info));
7076
7077         if (adev->flags & AMD_IS_APU)
7078                 ao_cu_num = 2;
7079         else
7080                 ao_cu_num = adev->gfx.config.max_cu_per_sh;
7081
7082         amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
7083
7084         mutex_lock(&adev->grbm_idx_mutex);
7085         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
7086                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
7087                         mask = 1;
7088                         ao_bitmap = 0;
7089                         counter = 0;
7090                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
7091                         if (i < 4 && j < 2)
7092                                 gfx_v8_0_set_user_cu_inactive_bitmap(
7093                                         adev, disable_masks[i * 2 + j]);
7094                         bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
7095                         cu_info->bitmap[i][j] = bitmap;
7096
7097                         for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
7098                                 if (bitmap & mask) {
7099                                         if (counter < ao_cu_num)
7100                                                 ao_bitmap |= mask;
7101                                         counter ++;
7102                                 }
7103                                 mask <<= 1;
7104                         }
7105                         active_cu_number += counter;
7106                         if (i < 2 && j < 2)
7107                                 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
7108                         cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
7109                 }
7110         }
7111         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
7112         mutex_unlock(&adev->grbm_idx_mutex);
7113
7114         cu_info->number = active_cu_number;
7115         cu_info->ao_cu_mask = ao_cu_mask;
7116         cu_info->simd_per_cu = NUM_SIMD_PER_CU;
7117         cu_info->max_waves_per_simd = 10;
7118         cu_info->max_scratch_slots_per_cu = 32;
7119         cu_info->wave_front_size = 64;
7120         cu_info->lds_size = 64;
7121 }
7122
7123 const struct amdgpu_ip_block_version gfx_v8_0_ip_block =
7124 {
7125         .type = AMD_IP_BLOCK_TYPE_GFX,
7126         .major = 8,
7127         .minor = 0,
7128         .rev = 0,
7129         .funcs = &gfx_v8_0_ip_funcs,
7130 };
7131
7132 const struct amdgpu_ip_block_version gfx_v8_1_ip_block =
7133 {
7134         .type = AMD_IP_BLOCK_TYPE_GFX,
7135         .major = 8,
7136         .minor = 1,
7137         .rev = 0,
7138         .funcs = &gfx_v8_0_ip_funcs,
7139 };
7140
7141 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
7142 {
7143         uint64_t ce_payload_addr;
7144         int cnt_ce;
7145         union {
7146                 struct vi_ce_ib_state regular;
7147                 struct vi_ce_ib_state_chained_ib chained;
7148         } ce_payload = {};
7149
7150         if (ring->adev->virt.chained_ib_support) {
7151                 ce_payload_addr = amdgpu_csa_vaddr(ring->adev) +
7152                         offsetof(struct vi_gfx_meta_data_chained_ib, ce_payload);
7153                 cnt_ce = (sizeof(ce_payload.chained) >> 2) + 4 - 2;
7154         } else {
7155                 ce_payload_addr = amdgpu_csa_vaddr(ring->adev) +
7156                         offsetof(struct vi_gfx_meta_data, ce_payload);
7157                 cnt_ce = (sizeof(ce_payload.regular) >> 2) + 4 - 2;
7158         }
7159
7160         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_ce));
7161         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
7162                                 WRITE_DATA_DST_SEL(8) |
7163                                 WR_CONFIRM) |
7164                                 WRITE_DATA_CACHE_POLICY(0));
7165         amdgpu_ring_write(ring, lower_32_bits(ce_payload_addr));
7166         amdgpu_ring_write(ring, upper_32_bits(ce_payload_addr));
7167         amdgpu_ring_write_multiple(ring, (void *)&ce_payload, cnt_ce - 2);
7168 }
7169
7170 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring)
7171 {
7172         uint64_t de_payload_addr, gds_addr, csa_addr;
7173         int cnt_de;
7174         union {
7175                 struct vi_de_ib_state regular;
7176                 struct vi_de_ib_state_chained_ib chained;
7177         } de_payload = {};
7178
7179         csa_addr = amdgpu_csa_vaddr(ring->adev);
7180         gds_addr = csa_addr + 4096;
7181         if (ring->adev->virt.chained_ib_support) {
7182                 de_payload.chained.gds_backup_addrlo = lower_32_bits(gds_addr);
7183                 de_payload.chained.gds_backup_addrhi = upper_32_bits(gds_addr);
7184                 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data_chained_ib, de_payload);
7185                 cnt_de = (sizeof(de_payload.chained) >> 2) + 4 - 2;
7186         } else {
7187                 de_payload.regular.gds_backup_addrlo = lower_32_bits(gds_addr);
7188                 de_payload.regular.gds_backup_addrhi = upper_32_bits(gds_addr);
7189                 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data, de_payload);
7190                 cnt_de = (sizeof(de_payload.regular) >> 2) + 4 - 2;
7191         }
7192
7193         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_de));
7194         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
7195                                 WRITE_DATA_DST_SEL(8) |
7196                                 WR_CONFIRM) |
7197                                 WRITE_DATA_CACHE_POLICY(0));
7198         amdgpu_ring_write(ring, lower_32_bits(de_payload_addr));
7199         amdgpu_ring_write(ring, upper_32_bits(de_payload_addr));
7200         amdgpu_ring_write_multiple(ring, (void *)&de_payload, cnt_de - 2);
7201 }
This page took 0.469782 seconds and 4 git commands to generate.