]> Git Repo - linux.git/blob - drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
Merge remote-tracking branch 'linuxtv/vsp1' into HEAD
[linux.git] / drivers / gpu / drm / amd / amdgpu / gfx_v8_0.c
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 #include <linux/firmware.h>
24 #include "drmP.h"
25 #include "amdgpu.h"
26 #include "amdgpu_gfx.h"
27 #include "vi.h"
28 #include "vid.h"
29 #include "amdgpu_ucode.h"
30 #include "clearstate_vi.h"
31
32 #include "gmc/gmc_8_2_d.h"
33 #include "gmc/gmc_8_2_sh_mask.h"
34
35 #include "oss/oss_3_0_d.h"
36 #include "oss/oss_3_0_sh_mask.h"
37
38 #include "bif/bif_5_0_d.h"
39 #include "bif/bif_5_0_sh_mask.h"
40
41 #include "gca/gfx_8_0_d.h"
42 #include "gca/gfx_8_0_enum.h"
43 #include "gca/gfx_8_0_sh_mask.h"
44 #include "gca/gfx_8_0_enum.h"
45
46 #include "dce/dce_10_0_d.h"
47 #include "dce/dce_10_0_sh_mask.h"
48
49 #define GFX8_NUM_GFX_RINGS     1
50 #define GFX8_NUM_COMPUTE_RINGS 8
51
52 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
53 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
54 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
55
56 #define ARRAY_MODE(x)                                   ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
57 #define PIPE_CONFIG(x)                                  ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
58 #define TILE_SPLIT(x)                                   ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
59 #define MICRO_TILE_MODE_NEW(x)                          ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
60 #define SAMPLE_SPLIT(x)                                 ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
61 #define BANK_WIDTH(x)                                   ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
62 #define BANK_HEIGHT(x)                                  ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
63 #define MACRO_TILE_ASPECT(x)                            ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
64 #define NUM_BANKS(x)                                    ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
65
66 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK            0x00000001L
67 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK            0x00000002L
68 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK           0x00000004L
69 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK           0x00000008L
70 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK           0x00000010L
71 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK           0x00000020L
72
73 /* BPM SERDES CMD */
74 #define SET_BPM_SERDES_CMD    1
75 #define CLE_BPM_SERDES_CMD    0
76
77 /* BPM Register Address*/
78 enum {
79         BPM_REG_CGLS_EN = 0,        /* Enable/Disable CGLS */
80         BPM_REG_CGLS_ON,            /* ON/OFF CGLS: shall be controlled by RLC FW */
81         BPM_REG_CGCG_OVERRIDE,      /* Set/Clear CGCG Override */
82         BPM_REG_MGCG_OVERRIDE,      /* Set/Clear MGCG Override */
83         BPM_REG_FGCG_OVERRIDE,      /* Set/Clear FGCG Override */
84         BPM_REG_FGCG_MAX
85 };
86
87 MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
88 MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
89 MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
90 MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
91 MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
92 MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
93
94 MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
95 MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
96 MODULE_FIRMWARE("amdgpu/stoney_me.bin");
97 MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
98 MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
99
100 MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
101 MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
102 MODULE_FIRMWARE("amdgpu/tonga_me.bin");
103 MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
104 MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
105 MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
106
107 MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
108 MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
109 MODULE_FIRMWARE("amdgpu/topaz_me.bin");
110 MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
111 MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
112
113 MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
114 MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
115 MODULE_FIRMWARE("amdgpu/fiji_me.bin");
116 MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
117 MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
118 MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
119
120 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
121 {
122         {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
123         {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
124         {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
125         {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
126         {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
127         {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
128         {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
129         {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
130         {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
131         {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
132         {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
133         {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
134         {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
135         {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
136         {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
137         {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
138 };
139
140 static const u32 golden_settings_tonga_a11[] =
141 {
142         mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
143         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
144         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
145         mmGB_GPU_ID, 0x0000000f, 0x00000000,
146         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
147         mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
148         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
149         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
150         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
151         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
152         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
153         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
154         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
155         mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
156         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
157 };
158
159 static const u32 tonga_golden_common_all[] =
160 {
161         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
162         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
163         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
164         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
165         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
166         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
167         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
168         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
169 };
170
171 static const u32 tonga_mgcg_cgcg_init[] =
172 {
173         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
174         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
175         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
176         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
177         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
178         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
179         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
180         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
181         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
182         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
183         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
184         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
185         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
186         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
187         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
188         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
189         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
190         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
191         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
192         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
193         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
194         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
195         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
196         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
197         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
198         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
199         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
200         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
201         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
202         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
203         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
204         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
205         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
206         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
207         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
208         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
209         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
210         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
211         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
212         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
213         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
214         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
215         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
216         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
217         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
218         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
219         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
220         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
221         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
222         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
223         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
224         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
225         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
226         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
227         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
228         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
229         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
230         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
231         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
232         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
233         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
234         mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
235         mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
236         mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
237         mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
238         mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
239         mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
240         mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
241         mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
242         mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
243         mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
244         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
245         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
246         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
247         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
248 };
249
250 static const u32 fiji_golden_common_all[] =
251 {
252         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
253         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
254         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
255         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
256         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
257         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
258         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
259         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
260         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
261         mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
262 };
263
264 static const u32 golden_settings_fiji_a10[] =
265 {
266         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
267         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
268         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
269         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
270         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
271         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
272         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
273         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
274         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
275         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
276         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
277 };
278
279 static const u32 fiji_mgcg_cgcg_init[] =
280 {
281         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
282         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
283         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
284         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
285         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
286         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
287         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
288         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
289         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
290         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
291         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
292         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
293         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
294         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
295         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
296         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
297         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
298         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
299         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
300         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
301         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
302         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
303         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
304         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
305         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
306         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
307         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
308         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
309         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
310         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
311         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
312         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
313         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
314         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
315         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
316 };
317
318 static const u32 golden_settings_iceland_a11[] =
319 {
320         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
321         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
322         mmDB_DEBUG3, 0xc0000000, 0xc0000000,
323         mmGB_GPU_ID, 0x0000000f, 0x00000000,
324         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
325         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
326         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
327         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
328         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
329         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
330         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
331         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
332         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
333         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
334         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
335 };
336
337 static const u32 iceland_golden_common_all[] =
338 {
339         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
340         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
341         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
342         mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
343         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
344         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
345         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
346         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
347 };
348
349 static const u32 iceland_mgcg_cgcg_init[] =
350 {
351         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
352         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
353         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
354         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
355         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
356         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
357         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
358         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
359         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
360         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
361         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
362         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
363         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
364         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
365         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
366         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
367         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
368         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
369         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
370         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
371         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
372         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
373         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
374         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
375         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
376         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
377         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
378         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
379         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
380         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
381         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
382         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
383         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
384         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
385         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
386         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
387         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
388         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
389         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
390         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
391         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
392         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
393         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
394         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
395         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
396         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
397         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
398         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
399         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
400         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
401         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
402         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
403         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
404         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
405         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
406         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
407         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
408         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
409         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
410         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
411         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
412         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
413         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
414         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
415 };
416
417 static const u32 cz_golden_settings_a11[] =
418 {
419         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
420         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
421         mmGB_GPU_ID, 0x0000000f, 0x00000000,
422         mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
423         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
424         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
425         mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
426         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
427         mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
428         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
429 };
430
431 static const u32 cz_golden_common_all[] =
432 {
433         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
434         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
435         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
436         mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
437         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
438         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
439         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
440         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
441 };
442
443 static const u32 cz_mgcg_cgcg_init[] =
444 {
445         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
446         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
447         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
448         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
449         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
450         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
451         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
452         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
453         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
454         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
455         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
456         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
457         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
458         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
459         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
460         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
461         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
462         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
463         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
464         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
465         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
466         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
467         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
468         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
469         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
470         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
471         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
472         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
473         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
474         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
475         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
476         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
477         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
478         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
479         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
480         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
481         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
482         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
483         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
484         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
485         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
486         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
487         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
488         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
489         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
490         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
491         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
492         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
493         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
494         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
495         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
496         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
497         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
498         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
499         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
500         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
501         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
502         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
503         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
504         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
505         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
506         mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
507         mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
508         mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
509         mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
510         mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
511         mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
512         mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
513         mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
514         mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
515         mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
516         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
517         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
518         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
519         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
520 };
521
522 static const u32 stoney_golden_settings_a11[] =
523 {
524         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
525         mmGB_GPU_ID, 0x0000000f, 0x00000000,
526         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
527         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
528         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
529         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
530         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
531         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
532         mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
533         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
534 };
535
536 static const u32 stoney_golden_common_all[] =
537 {
538         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
539         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
540         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
541         mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
542         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
543         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
544         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
545         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
546 };
547
548 static const u32 stoney_mgcg_cgcg_init[] =
549 {
550         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
551         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
552         mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
553         mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
554         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
555         mmATC_MISC_CG, 0xffffffff, 0x000c0200,
556 };
557
558 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
559 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
560 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
561
562 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
563 {
564         switch (adev->asic_type) {
565         case CHIP_TOPAZ:
566                 amdgpu_program_register_sequence(adev,
567                                                  iceland_mgcg_cgcg_init,
568                                                  (const u32)ARRAY_SIZE(iceland_mgcg_cgcg_init));
569                 amdgpu_program_register_sequence(adev,
570                                                  golden_settings_iceland_a11,
571                                                  (const u32)ARRAY_SIZE(golden_settings_iceland_a11));
572                 amdgpu_program_register_sequence(adev,
573                                                  iceland_golden_common_all,
574                                                  (const u32)ARRAY_SIZE(iceland_golden_common_all));
575                 break;
576         case CHIP_FIJI:
577                 amdgpu_program_register_sequence(adev,
578                                                  fiji_mgcg_cgcg_init,
579                                                  (const u32)ARRAY_SIZE(fiji_mgcg_cgcg_init));
580                 amdgpu_program_register_sequence(adev,
581                                                  golden_settings_fiji_a10,
582                                                  (const u32)ARRAY_SIZE(golden_settings_fiji_a10));
583                 amdgpu_program_register_sequence(adev,
584                                                  fiji_golden_common_all,
585                                                  (const u32)ARRAY_SIZE(fiji_golden_common_all));
586                 break;
587
588         case CHIP_TONGA:
589                 amdgpu_program_register_sequence(adev,
590                                                  tonga_mgcg_cgcg_init,
591                                                  (const u32)ARRAY_SIZE(tonga_mgcg_cgcg_init));
592                 amdgpu_program_register_sequence(adev,
593                                                  golden_settings_tonga_a11,
594                                                  (const u32)ARRAY_SIZE(golden_settings_tonga_a11));
595                 amdgpu_program_register_sequence(adev,
596                                                  tonga_golden_common_all,
597                                                  (const u32)ARRAY_SIZE(tonga_golden_common_all));
598                 break;
599         case CHIP_CARRIZO:
600                 amdgpu_program_register_sequence(adev,
601                                                  cz_mgcg_cgcg_init,
602                                                  (const u32)ARRAY_SIZE(cz_mgcg_cgcg_init));
603                 amdgpu_program_register_sequence(adev,
604                                                  cz_golden_settings_a11,
605                                                  (const u32)ARRAY_SIZE(cz_golden_settings_a11));
606                 amdgpu_program_register_sequence(adev,
607                                                  cz_golden_common_all,
608                                                  (const u32)ARRAY_SIZE(cz_golden_common_all));
609                 break;
610         case CHIP_STONEY:
611                 amdgpu_program_register_sequence(adev,
612                                                  stoney_mgcg_cgcg_init,
613                                                  (const u32)ARRAY_SIZE(stoney_mgcg_cgcg_init));
614                 amdgpu_program_register_sequence(adev,
615                                                  stoney_golden_settings_a11,
616                                                  (const u32)ARRAY_SIZE(stoney_golden_settings_a11));
617                 amdgpu_program_register_sequence(adev,
618                                                  stoney_golden_common_all,
619                                                  (const u32)ARRAY_SIZE(stoney_golden_common_all));
620                 break;
621         default:
622                 break;
623         }
624 }
625
626 static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
627 {
628         int i;
629
630         adev->gfx.scratch.num_reg = 7;
631         adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
632         for (i = 0; i < adev->gfx.scratch.num_reg; i++) {
633                 adev->gfx.scratch.free[i] = true;
634                 adev->gfx.scratch.reg[i] = adev->gfx.scratch.reg_base + i;
635         }
636 }
637
638 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
639 {
640         struct amdgpu_device *adev = ring->adev;
641         uint32_t scratch;
642         uint32_t tmp = 0;
643         unsigned i;
644         int r;
645
646         r = amdgpu_gfx_scratch_get(adev, &scratch);
647         if (r) {
648                 DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
649                 return r;
650         }
651         WREG32(scratch, 0xCAFEDEAD);
652         r = amdgpu_ring_alloc(ring, 3);
653         if (r) {
654                 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
655                           ring->idx, r);
656                 amdgpu_gfx_scratch_free(adev, scratch);
657                 return r;
658         }
659         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
660         amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
661         amdgpu_ring_write(ring, 0xDEADBEEF);
662         amdgpu_ring_commit(ring);
663
664         for (i = 0; i < adev->usec_timeout; i++) {
665                 tmp = RREG32(scratch);
666                 if (tmp == 0xDEADBEEF)
667                         break;
668                 DRM_UDELAY(1);
669         }
670         if (i < adev->usec_timeout) {
671                 DRM_INFO("ring test on %d succeeded in %d usecs\n",
672                          ring->idx, i);
673         } else {
674                 DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
675                           ring->idx, scratch, tmp);
676                 r = -EINVAL;
677         }
678         amdgpu_gfx_scratch_free(adev, scratch);
679         return r;
680 }
681
682 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring)
683 {
684         struct amdgpu_device *adev = ring->adev;
685         struct amdgpu_ib ib;
686         struct fence *f = NULL;
687         uint32_t scratch;
688         uint32_t tmp = 0;
689         unsigned i;
690         int r;
691
692         r = amdgpu_gfx_scratch_get(adev, &scratch);
693         if (r) {
694                 DRM_ERROR("amdgpu: failed to get scratch reg (%d).\n", r);
695                 return r;
696         }
697         WREG32(scratch, 0xCAFEDEAD);
698         memset(&ib, 0, sizeof(ib));
699         r = amdgpu_ib_get(adev, NULL, 256, &ib);
700         if (r) {
701                 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
702                 goto err1;
703         }
704         ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
705         ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START));
706         ib.ptr[2] = 0xDEADBEEF;
707         ib.length_dw = 3;
708
709         r = amdgpu_ib_schedule(ring, 1, &ib, AMDGPU_FENCE_OWNER_UNDEFINED,
710                                NULL, &f);
711         if (r)
712                 goto err2;
713
714         r = fence_wait(f, false);
715         if (r) {
716                 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
717                 goto err2;
718         }
719         for (i = 0; i < adev->usec_timeout; i++) {
720                 tmp = RREG32(scratch);
721                 if (tmp == 0xDEADBEEF)
722                         break;
723                 DRM_UDELAY(1);
724         }
725         if (i < adev->usec_timeout) {
726                 DRM_INFO("ib test on ring %d succeeded in %u usecs\n",
727                          ring->idx, i);
728                 goto err2;
729         } else {
730                 DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n",
731                           scratch, tmp);
732                 r = -EINVAL;
733         }
734 err2:
735         fence_put(f);
736         amdgpu_ib_free(adev, &ib);
737 err1:
738         amdgpu_gfx_scratch_free(adev, scratch);
739         return r;
740 }
741
742 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
743 {
744         const char *chip_name;
745         char fw_name[30];
746         int err;
747         struct amdgpu_firmware_info *info = NULL;
748         const struct common_firmware_header *header = NULL;
749         const struct gfx_firmware_header_v1_0 *cp_hdr;
750
751         DRM_DEBUG("\n");
752
753         switch (adev->asic_type) {
754         case CHIP_TOPAZ:
755                 chip_name = "topaz";
756                 break;
757         case CHIP_TONGA:
758                 chip_name = "tonga";
759                 break;
760         case CHIP_CARRIZO:
761                 chip_name = "carrizo";
762                 break;
763         case CHIP_FIJI:
764                 chip_name = "fiji";
765                 break;
766         case CHIP_STONEY:
767                 chip_name = "stoney";
768                 break;
769         default:
770                 BUG();
771         }
772
773         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
774         err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
775         if (err)
776                 goto out;
777         err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
778         if (err)
779                 goto out;
780         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
781         adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
782         adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
783
784         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
785         err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
786         if (err)
787                 goto out;
788         err = amdgpu_ucode_validate(adev->gfx.me_fw);
789         if (err)
790                 goto out;
791         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
792         adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
793         adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
794
795         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
796         err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
797         if (err)
798                 goto out;
799         err = amdgpu_ucode_validate(adev->gfx.ce_fw);
800         if (err)
801                 goto out;
802         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
803         adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
804         adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
805
806         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
807         err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
808         if (err)
809                 goto out;
810         err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
811         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.rlc_fw->data;
812         adev->gfx.rlc_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
813         adev->gfx.rlc_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
814
815         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
816         err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
817         if (err)
818                 goto out;
819         err = amdgpu_ucode_validate(adev->gfx.mec_fw);
820         if (err)
821                 goto out;
822         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
823         adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
824         adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
825
826         if ((adev->asic_type != CHIP_STONEY) &&
827             (adev->asic_type != CHIP_TOPAZ)) {
828                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
829                 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
830                 if (!err) {
831                         err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
832                         if (err)
833                                 goto out;
834                         cp_hdr = (const struct gfx_firmware_header_v1_0 *)
835                                 adev->gfx.mec2_fw->data;
836                         adev->gfx.mec2_fw_version =
837                                 le32_to_cpu(cp_hdr->header.ucode_version);
838                         adev->gfx.mec2_feature_version =
839                                 le32_to_cpu(cp_hdr->ucode_feature_version);
840                 } else {
841                         err = 0;
842                         adev->gfx.mec2_fw = NULL;
843                 }
844         }
845
846         if (adev->firmware.smu_load) {
847                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
848                 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
849                 info->fw = adev->gfx.pfp_fw;
850                 header = (const struct common_firmware_header *)info->fw->data;
851                 adev->firmware.fw_size +=
852                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
853
854                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
855                 info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
856                 info->fw = adev->gfx.me_fw;
857                 header = (const struct common_firmware_header *)info->fw->data;
858                 adev->firmware.fw_size +=
859                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
860
861                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
862                 info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
863                 info->fw = adev->gfx.ce_fw;
864                 header = (const struct common_firmware_header *)info->fw->data;
865                 adev->firmware.fw_size +=
866                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
867
868                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
869                 info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
870                 info->fw = adev->gfx.rlc_fw;
871                 header = (const struct common_firmware_header *)info->fw->data;
872                 adev->firmware.fw_size +=
873                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
874
875                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
876                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
877                 info->fw = adev->gfx.mec_fw;
878                 header = (const struct common_firmware_header *)info->fw->data;
879                 adev->firmware.fw_size +=
880                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
881
882                 if (adev->gfx.mec2_fw) {
883                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
884                         info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
885                         info->fw = adev->gfx.mec2_fw;
886                         header = (const struct common_firmware_header *)info->fw->data;
887                         adev->firmware.fw_size +=
888                                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
889                 }
890
891         }
892
893 out:
894         if (err) {
895                 dev_err(adev->dev,
896                         "gfx8: Failed to load firmware \"%s\"\n",
897                         fw_name);
898                 release_firmware(adev->gfx.pfp_fw);
899                 adev->gfx.pfp_fw = NULL;
900                 release_firmware(adev->gfx.me_fw);
901                 adev->gfx.me_fw = NULL;
902                 release_firmware(adev->gfx.ce_fw);
903                 adev->gfx.ce_fw = NULL;
904                 release_firmware(adev->gfx.rlc_fw);
905                 adev->gfx.rlc_fw = NULL;
906                 release_firmware(adev->gfx.mec_fw);
907                 adev->gfx.mec_fw = NULL;
908                 release_firmware(adev->gfx.mec2_fw);
909                 adev->gfx.mec2_fw = NULL;
910         }
911         return err;
912 }
913
914 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
915 {
916         int r;
917
918         if (adev->gfx.mec.hpd_eop_obj) {
919                 r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
920                 if (unlikely(r != 0))
921                         dev_warn(adev->dev, "(%d) reserve HPD EOP bo failed\n", r);
922                 amdgpu_bo_unpin(adev->gfx.mec.hpd_eop_obj);
923                 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
924
925                 amdgpu_bo_unref(&adev->gfx.mec.hpd_eop_obj);
926                 adev->gfx.mec.hpd_eop_obj = NULL;
927         }
928 }
929
930 #define MEC_HPD_SIZE 2048
931
932 static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
933 {
934         int r;
935         u32 *hpd;
936
937         /*
938          * we assign only 1 pipe because all other pipes will
939          * be handled by KFD
940          */
941         adev->gfx.mec.num_mec = 1;
942         adev->gfx.mec.num_pipe = 1;
943         adev->gfx.mec.num_queue = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * 8;
944
945         if (adev->gfx.mec.hpd_eop_obj == NULL) {
946                 r = amdgpu_bo_create(adev,
947                                      adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2,
948                                      PAGE_SIZE, true,
949                                      AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
950                                      &adev->gfx.mec.hpd_eop_obj);
951                 if (r) {
952                         dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
953                         return r;
954                 }
955         }
956
957         r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
958         if (unlikely(r != 0)) {
959                 gfx_v8_0_mec_fini(adev);
960                 return r;
961         }
962         r = amdgpu_bo_pin(adev->gfx.mec.hpd_eop_obj, AMDGPU_GEM_DOMAIN_GTT,
963                           &adev->gfx.mec.hpd_eop_gpu_addr);
964         if (r) {
965                 dev_warn(adev->dev, "(%d) pin HDP EOP bo failed\n", r);
966                 gfx_v8_0_mec_fini(adev);
967                 return r;
968         }
969         r = amdgpu_bo_kmap(adev->gfx.mec.hpd_eop_obj, (void **)&hpd);
970         if (r) {
971                 dev_warn(adev->dev, "(%d) map HDP EOP bo failed\n", r);
972                 gfx_v8_0_mec_fini(adev);
973                 return r;
974         }
975
976         memset(hpd, 0, adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2);
977
978         amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
979         amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
980
981         return 0;
982 }
983
984 static const u32 vgpr_init_compute_shader[] =
985 {
986         0x7e000209, 0x7e020208,
987         0x7e040207, 0x7e060206,
988         0x7e080205, 0x7e0a0204,
989         0x7e0c0203, 0x7e0e0202,
990         0x7e100201, 0x7e120200,
991         0x7e140209, 0x7e160208,
992         0x7e180207, 0x7e1a0206,
993         0x7e1c0205, 0x7e1e0204,
994         0x7e200203, 0x7e220202,
995         0x7e240201, 0x7e260200,
996         0x7e280209, 0x7e2a0208,
997         0x7e2c0207, 0x7e2e0206,
998         0x7e300205, 0x7e320204,
999         0x7e340203, 0x7e360202,
1000         0x7e380201, 0x7e3a0200,
1001         0x7e3c0209, 0x7e3e0208,
1002         0x7e400207, 0x7e420206,
1003         0x7e440205, 0x7e460204,
1004         0x7e480203, 0x7e4a0202,
1005         0x7e4c0201, 0x7e4e0200,
1006         0x7e500209, 0x7e520208,
1007         0x7e540207, 0x7e560206,
1008         0x7e580205, 0x7e5a0204,
1009         0x7e5c0203, 0x7e5e0202,
1010         0x7e600201, 0x7e620200,
1011         0x7e640209, 0x7e660208,
1012         0x7e680207, 0x7e6a0206,
1013         0x7e6c0205, 0x7e6e0204,
1014         0x7e700203, 0x7e720202,
1015         0x7e740201, 0x7e760200,
1016         0x7e780209, 0x7e7a0208,
1017         0x7e7c0207, 0x7e7e0206,
1018         0xbf8a0000, 0xbf810000,
1019 };
1020
1021 static const u32 sgpr_init_compute_shader[] =
1022 {
1023         0xbe8a0100, 0xbe8c0102,
1024         0xbe8e0104, 0xbe900106,
1025         0xbe920108, 0xbe940100,
1026         0xbe960102, 0xbe980104,
1027         0xbe9a0106, 0xbe9c0108,
1028         0xbe9e0100, 0xbea00102,
1029         0xbea20104, 0xbea40106,
1030         0xbea60108, 0xbea80100,
1031         0xbeaa0102, 0xbeac0104,
1032         0xbeae0106, 0xbeb00108,
1033         0xbeb20100, 0xbeb40102,
1034         0xbeb60104, 0xbeb80106,
1035         0xbeba0108, 0xbebc0100,
1036         0xbebe0102, 0xbec00104,
1037         0xbec20106, 0xbec40108,
1038         0xbec60100, 0xbec80102,
1039         0xbee60004, 0xbee70005,
1040         0xbeea0006, 0xbeeb0007,
1041         0xbee80008, 0xbee90009,
1042         0xbefc0000, 0xbf8a0000,
1043         0xbf810000, 0x00000000,
1044 };
1045
1046 static const u32 vgpr_init_regs[] =
1047 {
1048         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1049         mmCOMPUTE_RESOURCE_LIMITS, 0,
1050         mmCOMPUTE_NUM_THREAD_X, 256*4,
1051         mmCOMPUTE_NUM_THREAD_Y, 1,
1052         mmCOMPUTE_NUM_THREAD_Z, 1,
1053         mmCOMPUTE_PGM_RSRC2, 20,
1054         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1055         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1056         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1057         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1058         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1059         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1060         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1061         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1062         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1063         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1064 };
1065
1066 static const u32 sgpr1_init_regs[] =
1067 {
1068         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1069         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1070         mmCOMPUTE_NUM_THREAD_X, 256*5,
1071         mmCOMPUTE_NUM_THREAD_Y, 1,
1072         mmCOMPUTE_NUM_THREAD_Z, 1,
1073         mmCOMPUTE_PGM_RSRC2, 20,
1074         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1075         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1076         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1077         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1078         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1079         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1080         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1081         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1082         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1083         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1084 };
1085
1086 static const u32 sgpr2_init_regs[] =
1087 {
1088         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1089         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1090         mmCOMPUTE_NUM_THREAD_X, 256*5,
1091         mmCOMPUTE_NUM_THREAD_Y, 1,
1092         mmCOMPUTE_NUM_THREAD_Z, 1,
1093         mmCOMPUTE_PGM_RSRC2, 20,
1094         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1095         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1096         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1097         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1098         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1099         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1100         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1101         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1102         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1103         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1104 };
1105
1106 static const u32 sec_ded_counter_registers[] =
1107 {
1108         mmCPC_EDC_ATC_CNT,
1109         mmCPC_EDC_SCRATCH_CNT,
1110         mmCPC_EDC_UCODE_CNT,
1111         mmCPF_EDC_ATC_CNT,
1112         mmCPF_EDC_ROQ_CNT,
1113         mmCPF_EDC_TAG_CNT,
1114         mmCPG_EDC_ATC_CNT,
1115         mmCPG_EDC_DMA_CNT,
1116         mmCPG_EDC_TAG_CNT,
1117         mmDC_EDC_CSINVOC_CNT,
1118         mmDC_EDC_RESTORE_CNT,
1119         mmDC_EDC_STATE_CNT,
1120         mmGDS_EDC_CNT,
1121         mmGDS_EDC_GRBM_CNT,
1122         mmGDS_EDC_OA_DED,
1123         mmSPI_EDC_CNT,
1124         mmSQC_ATC_EDC_GATCL1_CNT,
1125         mmSQC_EDC_CNT,
1126         mmSQ_EDC_DED_CNT,
1127         mmSQ_EDC_INFO,
1128         mmSQ_EDC_SEC_CNT,
1129         mmTCC_EDC_CNT,
1130         mmTCP_ATC_EDC_GATCL1_CNT,
1131         mmTCP_EDC_CNT,
1132         mmTD_EDC_CNT
1133 };
1134
1135 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1136 {
1137         struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1138         struct amdgpu_ib ib;
1139         struct fence *f = NULL;
1140         int r, i;
1141         u32 tmp;
1142         unsigned total_size, vgpr_offset, sgpr_offset;
1143         u64 gpu_addr;
1144
1145         /* only supported on CZ */
1146         if (adev->asic_type != CHIP_CARRIZO)
1147                 return 0;
1148
1149         /* bail if the compute ring is not ready */
1150         if (!ring->ready)
1151                 return 0;
1152
1153         tmp = RREG32(mmGB_EDC_MODE);
1154         WREG32(mmGB_EDC_MODE, 0);
1155
1156         total_size =
1157                 (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1158         total_size +=
1159                 (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1160         total_size +=
1161                 (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1162         total_size = ALIGN(total_size, 256);
1163         vgpr_offset = total_size;
1164         total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1165         sgpr_offset = total_size;
1166         total_size += sizeof(sgpr_init_compute_shader);
1167
1168         /* allocate an indirect buffer to put the commands in */
1169         memset(&ib, 0, sizeof(ib));
1170         r = amdgpu_ib_get(adev, NULL, total_size, &ib);
1171         if (r) {
1172                 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1173                 return r;
1174         }
1175
1176         /* load the compute shaders */
1177         for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1178                 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1179
1180         for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1181                 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1182
1183         /* init the ib length to 0 */
1184         ib.length_dw = 0;
1185
1186         /* VGPR */
1187         /* write the register state for the compute dispatch */
1188         for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1189                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1190                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1191                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1192         }
1193         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1194         gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1195         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1196         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1197         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1198         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1199
1200         /* write dispatch packet */
1201         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1202         ib.ptr[ib.length_dw++] = 8; /* x */
1203         ib.ptr[ib.length_dw++] = 1; /* y */
1204         ib.ptr[ib.length_dw++] = 1; /* z */
1205         ib.ptr[ib.length_dw++] =
1206                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1207
1208         /* write CS partial flush packet */
1209         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1210         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1211
1212         /* SGPR1 */
1213         /* write the register state for the compute dispatch */
1214         for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1215                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1216                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1217                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1218         }
1219         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1220         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1221         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1222         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1223         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1224         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1225
1226         /* write dispatch packet */
1227         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1228         ib.ptr[ib.length_dw++] = 8; /* x */
1229         ib.ptr[ib.length_dw++] = 1; /* y */
1230         ib.ptr[ib.length_dw++] = 1; /* z */
1231         ib.ptr[ib.length_dw++] =
1232                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1233
1234         /* write CS partial flush packet */
1235         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1236         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1237
1238         /* SGPR2 */
1239         /* write the register state for the compute dispatch */
1240         for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1241                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1242                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1243                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1244         }
1245         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1246         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1247         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1248         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1249         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1250         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1251
1252         /* write dispatch packet */
1253         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1254         ib.ptr[ib.length_dw++] = 8; /* x */
1255         ib.ptr[ib.length_dw++] = 1; /* y */
1256         ib.ptr[ib.length_dw++] = 1; /* z */
1257         ib.ptr[ib.length_dw++] =
1258                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1259
1260         /* write CS partial flush packet */
1261         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1262         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1263
1264         /* shedule the ib on the ring */
1265         r = amdgpu_ib_schedule(ring, 1, &ib, AMDGPU_FENCE_OWNER_UNDEFINED,
1266                                NULL, &f);
1267         if (r) {
1268                 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1269                 goto fail;
1270         }
1271
1272         /* wait for the GPU to finish processing the IB */
1273         r = fence_wait(f, false);
1274         if (r) {
1275                 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1276                 goto fail;
1277         }
1278
1279         tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1280         tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1281         WREG32(mmGB_EDC_MODE, tmp);
1282
1283         tmp = RREG32(mmCC_GC_EDC_CONFIG);
1284         tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1285         WREG32(mmCC_GC_EDC_CONFIG, tmp);
1286
1287
1288         /* read back registers to clear the counters */
1289         for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1290                 RREG32(sec_ded_counter_registers[i]);
1291
1292 fail:
1293         fence_put(f);
1294         amdgpu_ib_free(adev, &ib);
1295
1296         return r;
1297 }
1298
1299 static void gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1300 {
1301         u32 gb_addr_config;
1302         u32 mc_shared_chmap, mc_arb_ramcfg;
1303         u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1304         u32 tmp;
1305
1306         switch (adev->asic_type) {
1307         case CHIP_TOPAZ:
1308                 adev->gfx.config.max_shader_engines = 1;
1309                 adev->gfx.config.max_tile_pipes = 2;
1310                 adev->gfx.config.max_cu_per_sh = 6;
1311                 adev->gfx.config.max_sh_per_se = 1;
1312                 adev->gfx.config.max_backends_per_se = 2;
1313                 adev->gfx.config.max_texture_channel_caches = 2;
1314                 adev->gfx.config.max_gprs = 256;
1315                 adev->gfx.config.max_gs_threads = 32;
1316                 adev->gfx.config.max_hw_contexts = 8;
1317
1318                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1319                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1320                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1321                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1322                 gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1323                 break;
1324         case CHIP_FIJI:
1325                 adev->gfx.config.max_shader_engines = 4;
1326                 adev->gfx.config.max_tile_pipes = 16;
1327                 adev->gfx.config.max_cu_per_sh = 16;
1328                 adev->gfx.config.max_sh_per_se = 1;
1329                 adev->gfx.config.max_backends_per_se = 4;
1330                 adev->gfx.config.max_texture_channel_caches = 16;
1331                 adev->gfx.config.max_gprs = 256;
1332                 adev->gfx.config.max_gs_threads = 32;
1333                 adev->gfx.config.max_hw_contexts = 8;
1334
1335                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1336                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1337                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1338                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1339                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1340                 break;
1341         case CHIP_TONGA:
1342                 adev->gfx.config.max_shader_engines = 4;
1343                 adev->gfx.config.max_tile_pipes = 8;
1344                 adev->gfx.config.max_cu_per_sh = 8;
1345                 adev->gfx.config.max_sh_per_se = 1;
1346                 adev->gfx.config.max_backends_per_se = 2;
1347                 adev->gfx.config.max_texture_channel_caches = 8;
1348                 adev->gfx.config.max_gprs = 256;
1349                 adev->gfx.config.max_gs_threads = 32;
1350                 adev->gfx.config.max_hw_contexts = 8;
1351
1352                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1353                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1354                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1355                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1356                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1357                 break;
1358         case CHIP_CARRIZO:
1359                 adev->gfx.config.max_shader_engines = 1;
1360                 adev->gfx.config.max_tile_pipes = 2;
1361                 adev->gfx.config.max_sh_per_se = 1;
1362                 adev->gfx.config.max_backends_per_se = 2;
1363
1364                 switch (adev->pdev->revision) {
1365                 case 0xc4:
1366                 case 0x84:
1367                 case 0xc8:
1368                 case 0xcc:
1369                 case 0xe1:
1370                 case 0xe3:
1371                         /* B10 */
1372                         adev->gfx.config.max_cu_per_sh = 8;
1373                         break;
1374                 case 0xc5:
1375                 case 0x81:
1376                 case 0x85:
1377                 case 0xc9:
1378                 case 0xcd:
1379                 case 0xe2:
1380                 case 0xe4:
1381                         /* B8 */
1382                         adev->gfx.config.max_cu_per_sh = 6;
1383                         break;
1384                 case 0xc6:
1385                 case 0xca:
1386                 case 0xce:
1387                 case 0x88:
1388                         /* B6 */
1389                         adev->gfx.config.max_cu_per_sh = 6;
1390                         break;
1391                 case 0xc7:
1392                 case 0x87:
1393                 case 0xcb:
1394                 case 0xe5:
1395                 case 0x89:
1396                 default:
1397                         /* B4 */
1398                         adev->gfx.config.max_cu_per_sh = 4;
1399                         break;
1400                 }
1401
1402                 adev->gfx.config.max_texture_channel_caches = 2;
1403                 adev->gfx.config.max_gprs = 256;
1404                 adev->gfx.config.max_gs_threads = 32;
1405                 adev->gfx.config.max_hw_contexts = 8;
1406
1407                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1408                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1409                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1410                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1411                 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1412                 break;
1413         case CHIP_STONEY:
1414                 adev->gfx.config.max_shader_engines = 1;
1415                 adev->gfx.config.max_tile_pipes = 2;
1416                 adev->gfx.config.max_sh_per_se = 1;
1417                 adev->gfx.config.max_backends_per_se = 1;
1418
1419                 switch (adev->pdev->revision) {
1420                 case 0xc0:
1421                 case 0xc1:
1422                 case 0xc2:
1423                 case 0xc4:
1424                 case 0xc8:
1425                 case 0xc9:
1426                         adev->gfx.config.max_cu_per_sh = 3;
1427                         break;
1428                 case 0xd0:
1429                 case 0xd1:
1430                 case 0xd2:
1431                 default:
1432                         adev->gfx.config.max_cu_per_sh = 2;
1433                         break;
1434                 }
1435
1436                 adev->gfx.config.max_texture_channel_caches = 2;
1437                 adev->gfx.config.max_gprs = 256;
1438                 adev->gfx.config.max_gs_threads = 16;
1439                 adev->gfx.config.max_hw_contexts = 8;
1440
1441                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1442                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1443                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1444                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1445                 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1446                 break;
1447         default:
1448                 adev->gfx.config.max_shader_engines = 2;
1449                 adev->gfx.config.max_tile_pipes = 4;
1450                 adev->gfx.config.max_cu_per_sh = 2;
1451                 adev->gfx.config.max_sh_per_se = 1;
1452                 adev->gfx.config.max_backends_per_se = 2;
1453                 adev->gfx.config.max_texture_channel_caches = 4;
1454                 adev->gfx.config.max_gprs = 256;
1455                 adev->gfx.config.max_gs_threads = 32;
1456                 adev->gfx.config.max_hw_contexts = 8;
1457
1458                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1459                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1460                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1461                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1462                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1463                 break;
1464         }
1465
1466         mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
1467         adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1468         mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1469
1470         adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1471         adev->gfx.config.mem_max_burst_length_bytes = 256;
1472         if (adev->flags & AMD_IS_APU) {
1473                 /* Get memory bank mapping mode. */
1474                 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
1475                 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1476                 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1477
1478                 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
1479                 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1480                 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1481
1482                 /* Validate settings in case only one DIMM installed. */
1483                 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
1484                         dimm00_addr_map = 0;
1485                 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
1486                         dimm01_addr_map = 0;
1487                 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
1488                         dimm10_addr_map = 0;
1489                 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
1490                         dimm11_addr_map = 0;
1491
1492                 /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1493                 /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1494                 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
1495                         adev->gfx.config.mem_row_size_in_kb = 2;
1496                 else
1497                         adev->gfx.config.mem_row_size_in_kb = 1;
1498         } else {
1499                 tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
1500                 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1501                 if (adev->gfx.config.mem_row_size_in_kb > 4)
1502                         adev->gfx.config.mem_row_size_in_kb = 4;
1503         }
1504
1505         adev->gfx.config.shader_engine_tile_size = 32;
1506         adev->gfx.config.num_gpus = 1;
1507         adev->gfx.config.multi_gpu_tile_size = 64;
1508
1509         /* fix up row size */
1510         switch (adev->gfx.config.mem_row_size_in_kb) {
1511         case 1:
1512         default:
1513                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
1514                 break;
1515         case 2:
1516                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
1517                 break;
1518         case 4:
1519                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
1520                 break;
1521         }
1522         adev->gfx.config.gb_addr_config = gb_addr_config;
1523 }
1524
1525 static int gfx_v8_0_sw_init(void *handle)
1526 {
1527         int i, r;
1528         struct amdgpu_ring *ring;
1529         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1530
1531         /* EOP Event */
1532         r = amdgpu_irq_add_id(adev, 181, &adev->gfx.eop_irq);
1533         if (r)
1534                 return r;
1535
1536         /* Privileged reg */
1537         r = amdgpu_irq_add_id(adev, 184, &adev->gfx.priv_reg_irq);
1538         if (r)
1539                 return r;
1540
1541         /* Privileged inst */
1542         r = amdgpu_irq_add_id(adev, 185, &adev->gfx.priv_inst_irq);
1543         if (r)
1544                 return r;
1545
1546         adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
1547
1548         gfx_v8_0_scratch_init(adev);
1549
1550         r = gfx_v8_0_init_microcode(adev);
1551         if (r) {
1552                 DRM_ERROR("Failed to load gfx firmware!\n");
1553                 return r;
1554         }
1555
1556         r = gfx_v8_0_mec_init(adev);
1557         if (r) {
1558                 DRM_ERROR("Failed to init MEC BOs!\n");
1559                 return r;
1560         }
1561
1562         /* set up the gfx ring */
1563         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
1564                 ring = &adev->gfx.gfx_ring[i];
1565                 ring->ring_obj = NULL;
1566                 sprintf(ring->name, "gfx");
1567                 /* no gfx doorbells on iceland */
1568                 if (adev->asic_type != CHIP_TOPAZ) {
1569                         ring->use_doorbell = true;
1570                         ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0;
1571                 }
1572
1573                 r = amdgpu_ring_init(adev, ring, 1024 * 1024,
1574                                      PACKET3(PACKET3_NOP, 0x3FFF), 0xf,
1575                                      &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_EOP,
1576                                      AMDGPU_RING_TYPE_GFX);
1577                 if (r)
1578                         return r;
1579         }
1580
1581         /* set up the compute queues */
1582         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
1583                 unsigned irq_type;
1584
1585                 /* max 32 queues per MEC */
1586                 if ((i >= 32) || (i >= AMDGPU_MAX_COMPUTE_RINGS)) {
1587                         DRM_ERROR("Too many (%d) compute rings!\n", i);
1588                         break;
1589                 }
1590                 ring = &adev->gfx.compute_ring[i];
1591                 ring->ring_obj = NULL;
1592                 ring->use_doorbell = true;
1593                 ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + i;
1594                 ring->me = 1; /* first MEC */
1595                 ring->pipe = i / 8;
1596                 ring->queue = i % 8;
1597                 sprintf(ring->name, "comp %d.%d.%d", ring->me, ring->pipe, ring->queue);
1598                 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe;
1599                 /* type-2 packets are deprecated on MEC, use type-3 instead */
1600                 r = amdgpu_ring_init(adev, ring, 1024 * 1024,
1601                                      PACKET3(PACKET3_NOP, 0x3FFF), 0xf,
1602                                      &adev->gfx.eop_irq, irq_type,
1603                                      AMDGPU_RING_TYPE_COMPUTE);
1604                 if (r)
1605                         return r;
1606         }
1607
1608         /* reserve GDS, GWS and OA resource for gfx */
1609         r = amdgpu_bo_create(adev, adev->gds.mem.gfx_partition_size,
1610                         PAGE_SIZE, true,
1611                         AMDGPU_GEM_DOMAIN_GDS, 0, NULL,
1612                         NULL, &adev->gds.gds_gfx_bo);
1613         if (r)
1614                 return r;
1615
1616         r = amdgpu_bo_create(adev, adev->gds.gws.gfx_partition_size,
1617                 PAGE_SIZE, true,
1618                 AMDGPU_GEM_DOMAIN_GWS, 0, NULL,
1619                 NULL, &adev->gds.gws_gfx_bo);
1620         if (r)
1621                 return r;
1622
1623         r = amdgpu_bo_create(adev, adev->gds.oa.gfx_partition_size,
1624                         PAGE_SIZE, true,
1625                         AMDGPU_GEM_DOMAIN_OA, 0, NULL,
1626                         NULL, &adev->gds.oa_gfx_bo);
1627         if (r)
1628                 return r;
1629
1630         adev->gfx.ce_ram_size = 0x8000;
1631
1632         gfx_v8_0_gpu_early_init(adev);
1633
1634         return 0;
1635 }
1636
1637 static int gfx_v8_0_sw_fini(void *handle)
1638 {
1639         int i;
1640         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1641
1642         amdgpu_bo_unref(&adev->gds.oa_gfx_bo);
1643         amdgpu_bo_unref(&adev->gds.gws_gfx_bo);
1644         amdgpu_bo_unref(&adev->gds.gds_gfx_bo);
1645
1646         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
1647                 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
1648         for (i = 0; i < adev->gfx.num_compute_rings; i++)
1649                 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
1650
1651         gfx_v8_0_mec_fini(adev);
1652
1653         return 0;
1654 }
1655
1656 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
1657 {
1658         uint32_t *modearray, *mod2array;
1659         const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
1660         const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
1661         u32 reg_offset;
1662
1663         modearray = adev->gfx.config.tile_mode_array;
1664         mod2array = adev->gfx.config.macrotile_mode_array;
1665
1666         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
1667                 modearray[reg_offset] = 0;
1668
1669         for (reg_offset = 0; reg_offset <  num_secondary_tile_mode_states; reg_offset++)
1670                 mod2array[reg_offset] = 0;
1671
1672         switch (adev->asic_type) {
1673         case CHIP_TOPAZ:
1674                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1675                                 PIPE_CONFIG(ADDR_SURF_P2) |
1676                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1677                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1678                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1679                                 PIPE_CONFIG(ADDR_SURF_P2) |
1680                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
1681                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1682                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1683                                 PIPE_CONFIG(ADDR_SURF_P2) |
1684                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1685                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1686                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1687                                 PIPE_CONFIG(ADDR_SURF_P2) |
1688                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1689                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1690                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1691                                 PIPE_CONFIG(ADDR_SURF_P2) |
1692                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1693                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1694                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1695                                 PIPE_CONFIG(ADDR_SURF_P2) |
1696                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1697                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1698                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1699                                 PIPE_CONFIG(ADDR_SURF_P2) |
1700                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1701                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1702                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1703                                 PIPE_CONFIG(ADDR_SURF_P2));
1704                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1705                                 PIPE_CONFIG(ADDR_SURF_P2) |
1706                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1707                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1708                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1709                                  PIPE_CONFIG(ADDR_SURF_P2) |
1710                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1711                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1712                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1713                                  PIPE_CONFIG(ADDR_SURF_P2) |
1714                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1715                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1716                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1717                                  PIPE_CONFIG(ADDR_SURF_P2) |
1718                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1719                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1720                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1721                                  PIPE_CONFIG(ADDR_SURF_P2) |
1722                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1723                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1724                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
1725                                  PIPE_CONFIG(ADDR_SURF_P2) |
1726                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1727                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1728                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1729                                  PIPE_CONFIG(ADDR_SURF_P2) |
1730                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1731                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1732                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1733                                  PIPE_CONFIG(ADDR_SURF_P2) |
1734                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1735                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1736                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1737                                  PIPE_CONFIG(ADDR_SURF_P2) |
1738                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1739                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1740                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1741                                  PIPE_CONFIG(ADDR_SURF_P2) |
1742                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1743                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1744                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
1745                                  PIPE_CONFIG(ADDR_SURF_P2) |
1746                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1747                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1748                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
1749                                  PIPE_CONFIG(ADDR_SURF_P2) |
1750                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1751                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1752                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1753                                  PIPE_CONFIG(ADDR_SURF_P2) |
1754                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1755                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1756                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
1757                                  PIPE_CONFIG(ADDR_SURF_P2) |
1758                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1759                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1760                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
1761                                  PIPE_CONFIG(ADDR_SURF_P2) |
1762                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1763                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1764                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1765                                  PIPE_CONFIG(ADDR_SURF_P2) |
1766                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1767                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1768                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1769                                  PIPE_CONFIG(ADDR_SURF_P2) |
1770                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1771                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1772                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1773                                  PIPE_CONFIG(ADDR_SURF_P2) |
1774                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1775                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1776
1777                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1778                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1779                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1780                                 NUM_BANKS(ADDR_SURF_8_BANK));
1781                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1782                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1783                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1784                                 NUM_BANKS(ADDR_SURF_8_BANK));
1785                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1786                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1787                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1788                                 NUM_BANKS(ADDR_SURF_8_BANK));
1789                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1790                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1791                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1792                                 NUM_BANKS(ADDR_SURF_8_BANK));
1793                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1794                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1795                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1796                                 NUM_BANKS(ADDR_SURF_8_BANK));
1797                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1798                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1799                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1800                                 NUM_BANKS(ADDR_SURF_8_BANK));
1801                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1802                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1803                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1804                                 NUM_BANKS(ADDR_SURF_8_BANK));
1805                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1806                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1807                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1808                                 NUM_BANKS(ADDR_SURF_16_BANK));
1809                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1810                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1811                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1812                                 NUM_BANKS(ADDR_SURF_16_BANK));
1813                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1814                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1815                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1816                                  NUM_BANKS(ADDR_SURF_16_BANK));
1817                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1818                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1819                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1820                                  NUM_BANKS(ADDR_SURF_16_BANK));
1821                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1822                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1823                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1824                                  NUM_BANKS(ADDR_SURF_16_BANK));
1825                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1826                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1827                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1828                                  NUM_BANKS(ADDR_SURF_16_BANK));
1829                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1830                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1831                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1832                                  NUM_BANKS(ADDR_SURF_8_BANK));
1833
1834                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
1835                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
1836                             reg_offset != 23)
1837                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
1838
1839                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
1840                         if (reg_offset != 7)
1841                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
1842
1843                 break;
1844         case CHIP_FIJI:
1845                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1846                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1847                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1848                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1849                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1850                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1851                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
1852                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1853                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1854                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1855                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1856                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1857                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1858                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1859                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1860                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1861                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1862                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1863                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1864                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1865                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1866                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1867                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1868                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1869                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1870                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1871                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1872                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1873                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1874                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1875                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1876                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1877                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1878                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
1879                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1880                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1881                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1882                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1883                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1884                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1885                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1886                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1887                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1888                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1889                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1890                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1891                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1892                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1893                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1894                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1895                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1896                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1897                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1898                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1899                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1900                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1901                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1902                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1903                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
1904                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1905                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1906                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1907                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1908                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1909                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1910                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1911                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1912                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1913                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1914                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1915                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1916                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1917                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1918                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1919                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1920                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1921                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1922                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1923                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1924                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1925                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1926                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1927                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
1928                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1929                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1930                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1931                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
1932                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1933                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1934                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1935                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
1936                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1937                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1938                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1939                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1940                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1941                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1942                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1943                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
1944                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1945                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1946                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1947                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
1948                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1949                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1950                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1951                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1952                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1953                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1954                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1955                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1956                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1957                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1958                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1959                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1960                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1961                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1962                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1963                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1964                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1965                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1966                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1967
1968                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1969                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1970                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1971                                 NUM_BANKS(ADDR_SURF_8_BANK));
1972                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1973                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1974                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1975                                 NUM_BANKS(ADDR_SURF_8_BANK));
1976                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1977                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1978                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1979                                 NUM_BANKS(ADDR_SURF_8_BANK));
1980                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1981                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1982                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1983                                 NUM_BANKS(ADDR_SURF_8_BANK));
1984                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1985                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1986                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1987                                 NUM_BANKS(ADDR_SURF_8_BANK));
1988                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1989                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1990                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1991                                 NUM_BANKS(ADDR_SURF_8_BANK));
1992                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1993                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1994                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1995                                 NUM_BANKS(ADDR_SURF_8_BANK));
1996                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1997                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1998                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1999                                 NUM_BANKS(ADDR_SURF_8_BANK));
2000                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2001                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2002                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2003                                 NUM_BANKS(ADDR_SURF_8_BANK));
2004                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2005                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2006                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2007                                  NUM_BANKS(ADDR_SURF_8_BANK));
2008                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2009                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2010                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2011                                  NUM_BANKS(ADDR_SURF_8_BANK));
2012                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2013                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2014                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2015                                  NUM_BANKS(ADDR_SURF_8_BANK));
2016                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2017                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2018                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2019                                  NUM_BANKS(ADDR_SURF_8_BANK));
2020                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2021                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2022                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2023                                  NUM_BANKS(ADDR_SURF_4_BANK));
2024
2025                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2026                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2027
2028                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2029                         if (reg_offset != 7)
2030                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2031
2032                 break;
2033         case CHIP_TONGA:
2034                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2035                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2036                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2037                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2038                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2039                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2040                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2041                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2042                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2043                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2044                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2045                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2046                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2047                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2048                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2049                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2050                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2051                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2052                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2053                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2054                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2055                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2056                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2057                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2058                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2059                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2060                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2061                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2062                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2063                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2064                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2065                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2066                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2067                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2068                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2069                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2070                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2071                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2072                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2073                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2074                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2075                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2076                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2077                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2078                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2079                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2080                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2081                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2082                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2083                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2084                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2085                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2086                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2087                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2088                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2089                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2090                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2091                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2092                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2093                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2094                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2095                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2096                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2097                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2098                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2099                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2100                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2101                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2102                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2103                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2104                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2105                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2106                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2107                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2108                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2109                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2110                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2111                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2112                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2113                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2114                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2115                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2116                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2117                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2118                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2119                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2120                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2121                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2122                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2123                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2124                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2125                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2126                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2127                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2128                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2129                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2130                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2131                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2132                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2133                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2134                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2135                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2136                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2137                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2138                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2139                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2140                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2141                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2142                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2143                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2144                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2145                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2146                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2147                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2148                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2149                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2150                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2151                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2152                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2153                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2154                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2155                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2156
2157                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2158                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2159                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2160                                 NUM_BANKS(ADDR_SURF_16_BANK));
2161                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2162                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2163                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2164                                 NUM_BANKS(ADDR_SURF_16_BANK));
2165                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2166                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2167                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2168                                 NUM_BANKS(ADDR_SURF_16_BANK));
2169                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2170                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2171                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2172                                 NUM_BANKS(ADDR_SURF_16_BANK));
2173                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2174                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2175                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2176                                 NUM_BANKS(ADDR_SURF_16_BANK));
2177                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2178                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2179                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2180                                 NUM_BANKS(ADDR_SURF_16_BANK));
2181                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2182                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2183                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2184                                 NUM_BANKS(ADDR_SURF_16_BANK));
2185                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2186                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2187                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2188                                 NUM_BANKS(ADDR_SURF_16_BANK));
2189                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2190                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2191                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2192                                 NUM_BANKS(ADDR_SURF_16_BANK));
2193                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2194                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2195                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2196                                  NUM_BANKS(ADDR_SURF_16_BANK));
2197                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2198                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2199                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2200                                  NUM_BANKS(ADDR_SURF_16_BANK));
2201                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2202                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2203                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2204                                  NUM_BANKS(ADDR_SURF_8_BANK));
2205                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2206                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2207                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2208                                  NUM_BANKS(ADDR_SURF_4_BANK));
2209                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2210                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2211                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2212                                  NUM_BANKS(ADDR_SURF_4_BANK));
2213
2214                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2215                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2216
2217                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2218                         if (reg_offset != 7)
2219                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2220
2221                 break;
2222         case CHIP_STONEY:
2223                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2224                                 PIPE_CONFIG(ADDR_SURF_P2) |
2225                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2226                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2227                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2228                                 PIPE_CONFIG(ADDR_SURF_P2) |
2229                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2230                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2231                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2232                                 PIPE_CONFIG(ADDR_SURF_P2) |
2233                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2234                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2235                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2236                                 PIPE_CONFIG(ADDR_SURF_P2) |
2237                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2238                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2239                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2240                                 PIPE_CONFIG(ADDR_SURF_P2) |
2241                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2242                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2243                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2244                                 PIPE_CONFIG(ADDR_SURF_P2) |
2245                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2246                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2247                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2248                                 PIPE_CONFIG(ADDR_SURF_P2) |
2249                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2250                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2251                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2252                                 PIPE_CONFIG(ADDR_SURF_P2));
2253                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2254                                 PIPE_CONFIG(ADDR_SURF_P2) |
2255                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2256                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2257                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2258                                  PIPE_CONFIG(ADDR_SURF_P2) |
2259                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2260                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2261                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2262                                  PIPE_CONFIG(ADDR_SURF_P2) |
2263                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2264                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2265                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2266                                  PIPE_CONFIG(ADDR_SURF_P2) |
2267                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2268                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2269                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2270                                  PIPE_CONFIG(ADDR_SURF_P2) |
2271                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2272                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2273                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2274                                  PIPE_CONFIG(ADDR_SURF_P2) |
2275                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2276                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2277                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2278                                  PIPE_CONFIG(ADDR_SURF_P2) |
2279                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2280                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2281                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2282                                  PIPE_CONFIG(ADDR_SURF_P2) |
2283                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2284                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2285                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2286                                  PIPE_CONFIG(ADDR_SURF_P2) |
2287                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2288                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2289                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2290                                  PIPE_CONFIG(ADDR_SURF_P2) |
2291                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2292                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2293                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2294                                  PIPE_CONFIG(ADDR_SURF_P2) |
2295                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2296                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2297                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2298                                  PIPE_CONFIG(ADDR_SURF_P2) |
2299                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2300                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2301                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2302                                  PIPE_CONFIG(ADDR_SURF_P2) |
2303                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2304                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2305                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2306                                  PIPE_CONFIG(ADDR_SURF_P2) |
2307                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2308                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2309                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2310                                  PIPE_CONFIG(ADDR_SURF_P2) |
2311                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2312                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2313                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2314                                  PIPE_CONFIG(ADDR_SURF_P2) |
2315                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2316                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2317                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2318                                  PIPE_CONFIG(ADDR_SURF_P2) |
2319                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2320                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2321                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2322                                  PIPE_CONFIG(ADDR_SURF_P2) |
2323                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2324                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2325
2326                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2327                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2328                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2329                                 NUM_BANKS(ADDR_SURF_8_BANK));
2330                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2331                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2332                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2333                                 NUM_BANKS(ADDR_SURF_8_BANK));
2334                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2335                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2336                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2337                                 NUM_BANKS(ADDR_SURF_8_BANK));
2338                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2339                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2340                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2341                                 NUM_BANKS(ADDR_SURF_8_BANK));
2342                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2343                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2344                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2345                                 NUM_BANKS(ADDR_SURF_8_BANK));
2346                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2347                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2348                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2349                                 NUM_BANKS(ADDR_SURF_8_BANK));
2350                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2351                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2352                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2353                                 NUM_BANKS(ADDR_SURF_8_BANK));
2354                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2355                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2356                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2357                                 NUM_BANKS(ADDR_SURF_16_BANK));
2358                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2359                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2360                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2361                                 NUM_BANKS(ADDR_SURF_16_BANK));
2362                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2363                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2364                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2365                                  NUM_BANKS(ADDR_SURF_16_BANK));
2366                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2367                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2368                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2369                                  NUM_BANKS(ADDR_SURF_16_BANK));
2370                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2371                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2372                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2373                                  NUM_BANKS(ADDR_SURF_16_BANK));
2374                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2375                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2376                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2377                                  NUM_BANKS(ADDR_SURF_16_BANK));
2378                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2379                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2380                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2381                                  NUM_BANKS(ADDR_SURF_8_BANK));
2382
2383                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2384                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2385                             reg_offset != 23)
2386                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2387
2388                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2389                         if (reg_offset != 7)
2390                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2391
2392                 break;
2393         default:
2394                 dev_warn(adev->dev,
2395                          "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
2396                          adev->asic_type);
2397
2398         case CHIP_CARRIZO:
2399                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2400                                 PIPE_CONFIG(ADDR_SURF_P2) |
2401                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2402                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2403                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2404                                 PIPE_CONFIG(ADDR_SURF_P2) |
2405                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2406                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2407                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2408                                 PIPE_CONFIG(ADDR_SURF_P2) |
2409                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2410                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2411                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2412                                 PIPE_CONFIG(ADDR_SURF_P2) |
2413                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2414                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2415                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2416                                 PIPE_CONFIG(ADDR_SURF_P2) |
2417                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2418                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2419                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2420                                 PIPE_CONFIG(ADDR_SURF_P2) |
2421                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2422                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2423                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2424                                 PIPE_CONFIG(ADDR_SURF_P2) |
2425                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2426                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2427                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2428                                 PIPE_CONFIG(ADDR_SURF_P2));
2429                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2430                                 PIPE_CONFIG(ADDR_SURF_P2) |
2431                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2432                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2433                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2434                                  PIPE_CONFIG(ADDR_SURF_P2) |
2435                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2436                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2437                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2438                                  PIPE_CONFIG(ADDR_SURF_P2) |
2439                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2440                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2441                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2442                                  PIPE_CONFIG(ADDR_SURF_P2) |
2443                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2444                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2445                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2446                                  PIPE_CONFIG(ADDR_SURF_P2) |
2447                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2448                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2449                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2450                                  PIPE_CONFIG(ADDR_SURF_P2) |
2451                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2452                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2453                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2454                                  PIPE_CONFIG(ADDR_SURF_P2) |
2455                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2456                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2457                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2458                                  PIPE_CONFIG(ADDR_SURF_P2) |
2459                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2460                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2461                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2462                                  PIPE_CONFIG(ADDR_SURF_P2) |
2463                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2464                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2465                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2466                                  PIPE_CONFIG(ADDR_SURF_P2) |
2467                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2468                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2469                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2470                                  PIPE_CONFIG(ADDR_SURF_P2) |
2471                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2472                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2473                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2474                                  PIPE_CONFIG(ADDR_SURF_P2) |
2475                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2476                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2477                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2478                                  PIPE_CONFIG(ADDR_SURF_P2) |
2479                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2480                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2481                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2482                                  PIPE_CONFIG(ADDR_SURF_P2) |
2483                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2484                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2485                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2486                                  PIPE_CONFIG(ADDR_SURF_P2) |
2487                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2488                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2489                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2490                                  PIPE_CONFIG(ADDR_SURF_P2) |
2491                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2492                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2493                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2494                                  PIPE_CONFIG(ADDR_SURF_P2) |
2495                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2496                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2497                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2498                                  PIPE_CONFIG(ADDR_SURF_P2) |
2499                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2500                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2501
2502                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2503                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2504                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2505                                 NUM_BANKS(ADDR_SURF_8_BANK));
2506                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2507                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2508                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2509                                 NUM_BANKS(ADDR_SURF_8_BANK));
2510                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2511                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2512                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2513                                 NUM_BANKS(ADDR_SURF_8_BANK));
2514                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2515                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2516                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2517                                 NUM_BANKS(ADDR_SURF_8_BANK));
2518                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2519                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2520                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2521                                 NUM_BANKS(ADDR_SURF_8_BANK));
2522                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2523                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2524                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2525                                 NUM_BANKS(ADDR_SURF_8_BANK));
2526                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2527                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2528                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2529                                 NUM_BANKS(ADDR_SURF_8_BANK));
2530                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2531                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2532                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2533                                 NUM_BANKS(ADDR_SURF_16_BANK));
2534                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2535                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2536                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2537                                 NUM_BANKS(ADDR_SURF_16_BANK));
2538                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2539                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2540                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2541                                  NUM_BANKS(ADDR_SURF_16_BANK));
2542                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2543                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2544                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2545                                  NUM_BANKS(ADDR_SURF_16_BANK));
2546                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2547                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2548                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2549                                  NUM_BANKS(ADDR_SURF_16_BANK));
2550                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2551                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2552                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2553                                  NUM_BANKS(ADDR_SURF_16_BANK));
2554                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2555                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2556                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2557                                  NUM_BANKS(ADDR_SURF_8_BANK));
2558
2559                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2560                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2561                             reg_offset != 23)
2562                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2563
2564                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2565                         if (reg_offset != 7)
2566                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2567
2568                 break;
2569         }
2570 }
2571
2572 void gfx_v8_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num)
2573 {
2574         u32 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
2575
2576         if ((se_num == 0xffffffff) && (sh_num == 0xffffffff)) {
2577                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
2578                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
2579         } else if (se_num == 0xffffffff) {
2580                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
2581                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
2582         } else if (sh_num == 0xffffffff) {
2583                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
2584                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
2585         } else {
2586                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
2587                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
2588         }
2589         WREG32(mmGRBM_GFX_INDEX, data);
2590 }
2591
2592 static u32 gfx_v8_0_create_bitmask(u32 bit_width)
2593 {
2594         return (u32)((1ULL << bit_width) - 1);
2595 }
2596
2597 static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
2598 {
2599         u32 data, mask;
2600
2601         data = RREG32(mmCC_RB_BACKEND_DISABLE);
2602         data |= RREG32(mmGC_USER_RB_BACKEND_DISABLE);
2603
2604         data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
2605         data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
2606
2607         mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_backends_per_se /
2608                                        adev->gfx.config.max_sh_per_se);
2609
2610         return (~data) & mask;
2611 }
2612
2613 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
2614 {
2615         int i, j;
2616         u32 data, tmp, num_rbs = 0;
2617         u32 active_rbs = 0;
2618
2619         mutex_lock(&adev->grbm_idx_mutex);
2620         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2621                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2622                         gfx_v8_0_select_se_sh(adev, i, j);
2623                         data = gfx_v8_0_get_rb_active_bitmap(adev);
2624                         active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
2625                                                RB_BITMAP_WIDTH_PER_SH);
2626                 }
2627         }
2628         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
2629         mutex_unlock(&adev->grbm_idx_mutex);
2630
2631         adev->gfx.config.backend_enable_mask = active_rbs;
2632         tmp = active_rbs;
2633         while (tmp >>= 1)
2634                 num_rbs++;
2635         adev->gfx.config.num_rbs = num_rbs;
2636 }
2637
2638 /**
2639  * gfx_v8_0_init_compute_vmid - gart enable
2640  *
2641  * @rdev: amdgpu_device pointer
2642  *
2643  * Initialize compute vmid sh_mem registers
2644  *
2645  */
2646 #define DEFAULT_SH_MEM_BASES    (0x6000)
2647 #define FIRST_COMPUTE_VMID      (8)
2648 #define LAST_COMPUTE_VMID       (16)
2649 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
2650 {
2651         int i;
2652         uint32_t sh_mem_config;
2653         uint32_t sh_mem_bases;
2654
2655         /*
2656          * Configure apertures:
2657          * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
2658          * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
2659          * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
2660          */
2661         sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
2662
2663         sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
2664                         SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
2665                         SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
2666                         SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
2667                         MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
2668                         SH_MEM_CONFIG__PRIVATE_ATC_MASK;
2669
2670         mutex_lock(&adev->srbm_mutex);
2671         for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
2672                 vi_srbm_select(adev, 0, 0, 0, i);
2673                 /* CP and shaders */
2674                 WREG32(mmSH_MEM_CONFIG, sh_mem_config);
2675                 WREG32(mmSH_MEM_APE1_BASE, 1);
2676                 WREG32(mmSH_MEM_APE1_LIMIT, 0);
2677                 WREG32(mmSH_MEM_BASES, sh_mem_bases);
2678         }
2679         vi_srbm_select(adev, 0, 0, 0, 0);
2680         mutex_unlock(&adev->srbm_mutex);
2681 }
2682
2683 static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
2684 {
2685         u32 tmp;
2686         int i;
2687
2688         tmp = RREG32(mmGRBM_CNTL);
2689         tmp = REG_SET_FIELD(tmp, GRBM_CNTL, READ_TIMEOUT, 0xff);
2690         WREG32(mmGRBM_CNTL, tmp);
2691
2692         WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
2693         WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
2694         WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
2695
2696         gfx_v8_0_tiling_mode_table_init(adev);
2697
2698         gfx_v8_0_setup_rb(adev);
2699
2700         /* XXX SH_MEM regs */
2701         /* where to put LDS, scratch, GPUVM in FSA64 space */
2702         mutex_lock(&adev->srbm_mutex);
2703         for (i = 0; i < 16; i++) {
2704                 vi_srbm_select(adev, 0, 0, 0, i);
2705                 /* CP and shaders */
2706                 if (i == 0) {
2707                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
2708                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
2709                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
2710                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2711                         WREG32(mmSH_MEM_CONFIG, tmp);
2712                 } else {
2713                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
2714                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_NC);
2715                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
2716                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2717                         WREG32(mmSH_MEM_CONFIG, tmp);
2718                 }
2719
2720                 WREG32(mmSH_MEM_APE1_BASE, 1);
2721                 WREG32(mmSH_MEM_APE1_LIMIT, 0);
2722                 WREG32(mmSH_MEM_BASES, 0);
2723         }
2724         vi_srbm_select(adev, 0, 0, 0, 0);
2725         mutex_unlock(&adev->srbm_mutex);
2726
2727         gfx_v8_0_init_compute_vmid(adev);
2728
2729         mutex_lock(&adev->grbm_idx_mutex);
2730         /*
2731          * making sure that the following register writes will be broadcasted
2732          * to all the shaders
2733          */
2734         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
2735
2736         WREG32(mmPA_SC_FIFO_SIZE,
2737                    (adev->gfx.config.sc_prim_fifo_size_frontend <<
2738                         PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
2739                    (adev->gfx.config.sc_prim_fifo_size_backend <<
2740                         PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
2741                    (adev->gfx.config.sc_hiz_tile_fifo_size <<
2742                         PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
2743                    (adev->gfx.config.sc_earlyz_tile_fifo_size <<
2744                         PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
2745         mutex_unlock(&adev->grbm_idx_mutex);
2746
2747 }
2748
2749 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
2750 {
2751         u32 i, j, k;
2752         u32 mask;
2753
2754         mutex_lock(&adev->grbm_idx_mutex);
2755         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2756                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2757                         gfx_v8_0_select_se_sh(adev, i, j);
2758                         for (k = 0; k < adev->usec_timeout; k++) {
2759                                 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
2760                                         break;
2761                                 udelay(1);
2762                         }
2763                 }
2764         }
2765         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
2766         mutex_unlock(&adev->grbm_idx_mutex);
2767
2768         mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
2769                 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
2770                 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
2771                 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
2772         for (k = 0; k < adev->usec_timeout; k++) {
2773                 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
2774                         break;
2775                 udelay(1);
2776         }
2777 }
2778
2779 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
2780                                                bool enable)
2781 {
2782         u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
2783
2784         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
2785         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
2786         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
2787         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
2788
2789         WREG32(mmCP_INT_CNTL_RING0, tmp);
2790 }
2791
2792 void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
2793 {
2794         u32 tmp = RREG32(mmRLC_CNTL);
2795
2796         tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 0);
2797         WREG32(mmRLC_CNTL, tmp);
2798
2799         gfx_v8_0_enable_gui_idle_interrupt(adev, false);
2800
2801         gfx_v8_0_wait_for_rlc_serdes(adev);
2802 }
2803
2804 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
2805 {
2806         u32 tmp = RREG32(mmGRBM_SOFT_RESET);
2807
2808         tmp = REG_SET_FIELD(tmp, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
2809         WREG32(mmGRBM_SOFT_RESET, tmp);
2810         udelay(50);
2811         tmp = REG_SET_FIELD(tmp, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
2812         WREG32(mmGRBM_SOFT_RESET, tmp);
2813         udelay(50);
2814 }
2815
2816 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
2817 {
2818         u32 tmp = RREG32(mmRLC_CNTL);
2819
2820         tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 1);
2821         WREG32(mmRLC_CNTL, tmp);
2822
2823         /* carrizo do enable cp interrupt after cp inited */
2824         if (!(adev->flags & AMD_IS_APU))
2825                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
2826
2827         udelay(50);
2828 }
2829
2830 static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev)
2831 {
2832         const struct rlc_firmware_header_v2_0 *hdr;
2833         const __le32 *fw_data;
2834         unsigned i, fw_size;
2835
2836         if (!adev->gfx.rlc_fw)
2837                 return -EINVAL;
2838
2839         hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
2840         amdgpu_ucode_print_rlc_hdr(&hdr->header);
2841
2842         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
2843                            le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2844         fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
2845
2846         WREG32(mmRLC_GPM_UCODE_ADDR, 0);
2847         for (i = 0; i < fw_size; i++)
2848                 WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
2849         WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
2850
2851         return 0;
2852 }
2853
2854 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
2855 {
2856         int r;
2857
2858         gfx_v8_0_rlc_stop(adev);
2859
2860         /* disable CG */
2861         WREG32(mmRLC_CGCG_CGLS_CTRL, 0);
2862
2863         /* disable PG */
2864         WREG32(mmRLC_PG_CNTL, 0);
2865
2866         gfx_v8_0_rlc_reset(adev);
2867
2868         if (!adev->pp_enabled) {
2869                 if (!adev->firmware.smu_load) {
2870                         /* legacy rlc firmware loading */
2871                         r = gfx_v8_0_rlc_load_microcode(adev);
2872                         if (r)
2873                                 return r;
2874                 } else {
2875                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
2876                                                         AMDGPU_UCODE_ID_RLC_G);
2877                         if (r)
2878                                 return -EINVAL;
2879                 }
2880         }
2881
2882         gfx_v8_0_rlc_start(adev);
2883
2884         return 0;
2885 }
2886
2887 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
2888 {
2889         int i;
2890         u32 tmp = RREG32(mmCP_ME_CNTL);
2891
2892         if (enable) {
2893                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
2894                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
2895                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
2896         } else {
2897                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
2898                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
2899                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
2900                 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2901                         adev->gfx.gfx_ring[i].ready = false;
2902         }
2903         WREG32(mmCP_ME_CNTL, tmp);
2904         udelay(50);
2905 }
2906
2907 static int gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
2908 {
2909         const struct gfx_firmware_header_v1_0 *pfp_hdr;
2910         const struct gfx_firmware_header_v1_0 *ce_hdr;
2911         const struct gfx_firmware_header_v1_0 *me_hdr;
2912         const __le32 *fw_data;
2913         unsigned i, fw_size;
2914
2915         if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
2916                 return -EINVAL;
2917
2918         pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
2919                 adev->gfx.pfp_fw->data;
2920         ce_hdr = (const struct gfx_firmware_header_v1_0 *)
2921                 adev->gfx.ce_fw->data;
2922         me_hdr = (const struct gfx_firmware_header_v1_0 *)
2923                 adev->gfx.me_fw->data;
2924
2925         amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
2926         amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
2927         amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
2928
2929         gfx_v8_0_cp_gfx_enable(adev, false);
2930
2931         /* PFP */
2932         fw_data = (const __le32 *)
2933                 (adev->gfx.pfp_fw->data +
2934                  le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
2935         fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
2936         WREG32(mmCP_PFP_UCODE_ADDR, 0);
2937         for (i = 0; i < fw_size; i++)
2938                 WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
2939         WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
2940
2941         /* CE */
2942         fw_data = (const __le32 *)
2943                 (adev->gfx.ce_fw->data +
2944                  le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
2945         fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
2946         WREG32(mmCP_CE_UCODE_ADDR, 0);
2947         for (i = 0; i < fw_size; i++)
2948                 WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
2949         WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
2950
2951         /* ME */
2952         fw_data = (const __le32 *)
2953                 (adev->gfx.me_fw->data +
2954                  le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
2955         fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
2956         WREG32(mmCP_ME_RAM_WADDR, 0);
2957         for (i = 0; i < fw_size; i++)
2958                 WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
2959         WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
2960
2961         return 0;
2962 }
2963
2964 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
2965 {
2966         u32 count = 0;
2967         const struct cs_section_def *sect = NULL;
2968         const struct cs_extent_def *ext = NULL;
2969
2970         /* begin clear state */
2971         count += 2;
2972         /* context control state */
2973         count += 3;
2974
2975         for (sect = vi_cs_data; sect->section != NULL; ++sect) {
2976                 for (ext = sect->section; ext->extent != NULL; ++ext) {
2977                         if (sect->id == SECT_CONTEXT)
2978                                 count += 2 + ext->reg_count;
2979                         else
2980                                 return 0;
2981                 }
2982         }
2983         /* pa_sc_raster_config/pa_sc_raster_config1 */
2984         count += 4;
2985         /* end clear state */
2986         count += 2;
2987         /* clear state */
2988         count += 2;
2989
2990         return count;
2991 }
2992
2993 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
2994 {
2995         struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
2996         const struct cs_section_def *sect = NULL;
2997         const struct cs_extent_def *ext = NULL;
2998         int r, i;
2999
3000         /* init the CP */
3001         WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
3002         WREG32(mmCP_ENDIAN_SWAP, 0);
3003         WREG32(mmCP_DEVICE_ID, 1);
3004
3005         gfx_v8_0_cp_gfx_enable(adev, true);
3006
3007         r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
3008         if (r) {
3009                 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
3010                 return r;
3011         }
3012
3013         /* clear state buffer */
3014         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3015         amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3016
3017         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3018         amdgpu_ring_write(ring, 0x80000000);
3019         amdgpu_ring_write(ring, 0x80000000);
3020
3021         for (sect = vi_cs_data; sect->section != NULL; ++sect) {
3022                 for (ext = sect->section; ext->extent != NULL; ++ext) {
3023                         if (sect->id == SECT_CONTEXT) {
3024                                 amdgpu_ring_write(ring,
3025                                        PACKET3(PACKET3_SET_CONTEXT_REG,
3026                                                ext->reg_count));
3027                                 amdgpu_ring_write(ring,
3028                                        ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
3029                                 for (i = 0; i < ext->reg_count; i++)
3030                                         amdgpu_ring_write(ring, ext->extent[i]);
3031                         }
3032                 }
3033         }
3034
3035         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3036         amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
3037         switch (adev->asic_type) {
3038         case CHIP_TONGA:
3039                 amdgpu_ring_write(ring, 0x16000012);
3040                 amdgpu_ring_write(ring, 0x0000002A);
3041                 break;
3042         case CHIP_FIJI:
3043                 amdgpu_ring_write(ring, 0x3a00161a);
3044                 amdgpu_ring_write(ring, 0x0000002e);
3045                 break;
3046         case CHIP_TOPAZ:
3047         case CHIP_CARRIZO:
3048                 amdgpu_ring_write(ring, 0x00000002);
3049                 amdgpu_ring_write(ring, 0x00000000);
3050                 break;
3051         case CHIP_STONEY:
3052                 amdgpu_ring_write(ring, 0x00000000);
3053                 amdgpu_ring_write(ring, 0x00000000);
3054                 break;
3055         default:
3056                 BUG();
3057         }
3058
3059         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3060         amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3061
3062         amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3063         amdgpu_ring_write(ring, 0);
3064
3065         /* init the CE partitions */
3066         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3067         amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3068         amdgpu_ring_write(ring, 0x8000);
3069         amdgpu_ring_write(ring, 0x8000);
3070
3071         amdgpu_ring_commit(ring);
3072
3073         return 0;
3074 }
3075
3076 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
3077 {
3078         struct amdgpu_ring *ring;
3079         u32 tmp;
3080         u32 rb_bufsz;
3081         u64 rb_addr, rptr_addr;
3082         int r;
3083
3084         /* Set the write pointer delay */
3085         WREG32(mmCP_RB_WPTR_DELAY, 0);
3086
3087         /* set the RB to use vmid 0 */
3088         WREG32(mmCP_RB_VMID, 0);
3089
3090         /* Set ring buffer size */
3091         ring = &adev->gfx.gfx_ring[0];
3092         rb_bufsz = order_base_2(ring->ring_size / 8);
3093         tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
3094         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
3095         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
3096         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
3097 #ifdef __BIG_ENDIAN
3098         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
3099 #endif
3100         WREG32(mmCP_RB0_CNTL, tmp);
3101
3102         /* Initialize the ring buffer's read and write pointers */
3103         WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
3104         ring->wptr = 0;
3105         WREG32(mmCP_RB0_WPTR, ring->wptr);
3106
3107         /* set the wb address wether it's enabled or not */
3108         rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
3109         WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
3110         WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
3111
3112         mdelay(1);
3113         WREG32(mmCP_RB0_CNTL, tmp);
3114
3115         rb_addr = ring->gpu_addr >> 8;
3116         WREG32(mmCP_RB0_BASE, rb_addr);
3117         WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
3118
3119         /* no gfx doorbells on iceland */
3120         if (adev->asic_type != CHIP_TOPAZ) {
3121                 tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
3122                 if (ring->use_doorbell) {
3123                         tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3124                                             DOORBELL_OFFSET, ring->doorbell_index);
3125                         tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3126                                             DOORBELL_EN, 1);
3127                 } else {
3128                         tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3129                                             DOORBELL_EN, 0);
3130                 }
3131                 WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
3132
3133                 if (adev->asic_type == CHIP_TONGA) {
3134                         tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
3135                                             DOORBELL_RANGE_LOWER,
3136                                             AMDGPU_DOORBELL_GFX_RING0);
3137                         WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
3138
3139                         WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
3140                                CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
3141                 }
3142
3143         }
3144
3145         /* start the ring */
3146         gfx_v8_0_cp_gfx_start(adev);
3147         ring->ready = true;
3148         r = amdgpu_ring_test_ring(ring);
3149         if (r) {
3150                 ring->ready = false;
3151                 return r;
3152         }
3153
3154         return 0;
3155 }
3156
3157 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
3158 {
3159         int i;
3160
3161         if (enable) {
3162                 WREG32(mmCP_MEC_CNTL, 0);
3163         } else {
3164                 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
3165                 for (i = 0; i < adev->gfx.num_compute_rings; i++)
3166                         adev->gfx.compute_ring[i].ready = false;
3167         }
3168         udelay(50);
3169 }
3170
3171 static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev)
3172 {
3173         const struct gfx_firmware_header_v1_0 *mec_hdr;
3174         const __le32 *fw_data;
3175         unsigned i, fw_size;
3176
3177         if (!adev->gfx.mec_fw)
3178                 return -EINVAL;
3179
3180         gfx_v8_0_cp_compute_enable(adev, false);
3181
3182         mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
3183         amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
3184
3185         fw_data = (const __le32 *)
3186                 (adev->gfx.mec_fw->data +
3187                  le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
3188         fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
3189
3190         /* MEC1 */
3191         WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0);
3192         for (i = 0; i < fw_size; i++)
3193                 WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data+i));
3194         WREG32(mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version);
3195
3196         /* Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
3197         if (adev->gfx.mec2_fw) {
3198                 const struct gfx_firmware_header_v1_0 *mec2_hdr;
3199
3200                 mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
3201                 amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header);
3202
3203                 fw_data = (const __le32 *)
3204                         (adev->gfx.mec2_fw->data +
3205                          le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
3206                 fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
3207
3208                 WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0);
3209                 for (i = 0; i < fw_size; i++)
3210                         WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data+i));
3211                 WREG32(mmCP_MEC_ME2_UCODE_ADDR, adev->gfx.mec2_fw_version);
3212         }
3213
3214         return 0;
3215 }
3216
3217 struct vi_mqd {
3218         uint32_t header;  /* ordinal0 */
3219         uint32_t compute_dispatch_initiator;  /* ordinal1 */
3220         uint32_t compute_dim_x;  /* ordinal2 */
3221         uint32_t compute_dim_y;  /* ordinal3 */
3222         uint32_t compute_dim_z;  /* ordinal4 */
3223         uint32_t compute_start_x;  /* ordinal5 */
3224         uint32_t compute_start_y;  /* ordinal6 */
3225         uint32_t compute_start_z;  /* ordinal7 */
3226         uint32_t compute_num_thread_x;  /* ordinal8 */
3227         uint32_t compute_num_thread_y;  /* ordinal9 */
3228         uint32_t compute_num_thread_z;  /* ordinal10 */
3229         uint32_t compute_pipelinestat_enable;  /* ordinal11 */
3230         uint32_t compute_perfcount_enable;  /* ordinal12 */
3231         uint32_t compute_pgm_lo;  /* ordinal13 */
3232         uint32_t compute_pgm_hi;  /* ordinal14 */
3233         uint32_t compute_tba_lo;  /* ordinal15 */
3234         uint32_t compute_tba_hi;  /* ordinal16 */
3235         uint32_t compute_tma_lo;  /* ordinal17 */
3236         uint32_t compute_tma_hi;  /* ordinal18 */
3237         uint32_t compute_pgm_rsrc1;  /* ordinal19 */
3238         uint32_t compute_pgm_rsrc2;  /* ordinal20 */
3239         uint32_t compute_vmid;  /* ordinal21 */
3240         uint32_t compute_resource_limits;  /* ordinal22 */
3241         uint32_t compute_static_thread_mgmt_se0;  /* ordinal23 */
3242         uint32_t compute_static_thread_mgmt_se1;  /* ordinal24 */
3243         uint32_t compute_tmpring_size;  /* ordinal25 */
3244         uint32_t compute_static_thread_mgmt_se2;  /* ordinal26 */
3245         uint32_t compute_static_thread_mgmt_se3;  /* ordinal27 */
3246         uint32_t compute_restart_x;  /* ordinal28 */
3247         uint32_t compute_restart_y;  /* ordinal29 */
3248         uint32_t compute_restart_z;  /* ordinal30 */
3249         uint32_t compute_thread_trace_enable;  /* ordinal31 */
3250         uint32_t compute_misc_reserved;  /* ordinal32 */
3251         uint32_t compute_dispatch_id;  /* ordinal33 */
3252         uint32_t compute_threadgroup_id;  /* ordinal34 */
3253         uint32_t compute_relaunch;  /* ordinal35 */
3254         uint32_t compute_wave_restore_addr_lo;  /* ordinal36 */
3255         uint32_t compute_wave_restore_addr_hi;  /* ordinal37 */
3256         uint32_t compute_wave_restore_control;  /* ordinal38 */
3257         uint32_t reserved9;  /* ordinal39 */
3258         uint32_t reserved10;  /* ordinal40 */
3259         uint32_t reserved11;  /* ordinal41 */
3260         uint32_t reserved12;  /* ordinal42 */
3261         uint32_t reserved13;  /* ordinal43 */
3262         uint32_t reserved14;  /* ordinal44 */
3263         uint32_t reserved15;  /* ordinal45 */
3264         uint32_t reserved16;  /* ordinal46 */
3265         uint32_t reserved17;  /* ordinal47 */
3266         uint32_t reserved18;  /* ordinal48 */
3267         uint32_t reserved19;  /* ordinal49 */
3268         uint32_t reserved20;  /* ordinal50 */
3269         uint32_t reserved21;  /* ordinal51 */
3270         uint32_t reserved22;  /* ordinal52 */
3271         uint32_t reserved23;  /* ordinal53 */
3272         uint32_t reserved24;  /* ordinal54 */
3273         uint32_t reserved25;  /* ordinal55 */
3274         uint32_t reserved26;  /* ordinal56 */
3275         uint32_t reserved27;  /* ordinal57 */
3276         uint32_t reserved28;  /* ordinal58 */
3277         uint32_t reserved29;  /* ordinal59 */
3278         uint32_t reserved30;  /* ordinal60 */
3279         uint32_t reserved31;  /* ordinal61 */
3280         uint32_t reserved32;  /* ordinal62 */
3281         uint32_t reserved33;  /* ordinal63 */
3282         uint32_t reserved34;  /* ordinal64 */
3283         uint32_t compute_user_data_0;  /* ordinal65 */
3284         uint32_t compute_user_data_1;  /* ordinal66 */
3285         uint32_t compute_user_data_2;  /* ordinal67 */
3286         uint32_t compute_user_data_3;  /* ordinal68 */
3287         uint32_t compute_user_data_4;  /* ordinal69 */
3288         uint32_t compute_user_data_5;  /* ordinal70 */
3289         uint32_t compute_user_data_6;  /* ordinal71 */
3290         uint32_t compute_user_data_7;  /* ordinal72 */
3291         uint32_t compute_user_data_8;  /* ordinal73 */
3292         uint32_t compute_user_data_9;  /* ordinal74 */
3293         uint32_t compute_user_data_10;  /* ordinal75 */
3294         uint32_t compute_user_data_11;  /* ordinal76 */
3295         uint32_t compute_user_data_12;  /* ordinal77 */
3296         uint32_t compute_user_data_13;  /* ordinal78 */
3297         uint32_t compute_user_data_14;  /* ordinal79 */
3298         uint32_t compute_user_data_15;  /* ordinal80 */
3299         uint32_t cp_compute_csinvoc_count_lo;  /* ordinal81 */
3300         uint32_t cp_compute_csinvoc_count_hi;  /* ordinal82 */
3301         uint32_t reserved35;  /* ordinal83 */
3302         uint32_t reserved36;  /* ordinal84 */
3303         uint32_t reserved37;  /* ordinal85 */
3304         uint32_t cp_mqd_query_time_lo;  /* ordinal86 */
3305         uint32_t cp_mqd_query_time_hi;  /* ordinal87 */
3306         uint32_t cp_mqd_connect_start_time_lo;  /* ordinal88 */
3307         uint32_t cp_mqd_connect_start_time_hi;  /* ordinal89 */
3308         uint32_t cp_mqd_connect_end_time_lo;  /* ordinal90 */
3309         uint32_t cp_mqd_connect_end_time_hi;  /* ordinal91 */
3310         uint32_t cp_mqd_connect_end_wf_count;  /* ordinal92 */
3311         uint32_t cp_mqd_connect_end_pq_rptr;  /* ordinal93 */
3312         uint32_t cp_mqd_connect_end_pq_wptr;  /* ordinal94 */
3313         uint32_t cp_mqd_connect_end_ib_rptr;  /* ordinal95 */
3314         uint32_t reserved38;  /* ordinal96 */
3315         uint32_t reserved39;  /* ordinal97 */
3316         uint32_t cp_mqd_save_start_time_lo;  /* ordinal98 */
3317         uint32_t cp_mqd_save_start_time_hi;  /* ordinal99 */
3318         uint32_t cp_mqd_save_end_time_lo;  /* ordinal100 */
3319         uint32_t cp_mqd_save_end_time_hi;  /* ordinal101 */
3320         uint32_t cp_mqd_restore_start_time_lo;  /* ordinal102 */
3321         uint32_t cp_mqd_restore_start_time_hi;  /* ordinal103 */
3322         uint32_t cp_mqd_restore_end_time_lo;  /* ordinal104 */
3323         uint32_t cp_mqd_restore_end_time_hi;  /* ordinal105 */
3324         uint32_t reserved40;  /* ordinal106 */
3325         uint32_t reserved41;  /* ordinal107 */
3326         uint32_t gds_cs_ctxsw_cnt0;  /* ordinal108 */
3327         uint32_t gds_cs_ctxsw_cnt1;  /* ordinal109 */
3328         uint32_t gds_cs_ctxsw_cnt2;  /* ordinal110 */
3329         uint32_t gds_cs_ctxsw_cnt3;  /* ordinal111 */
3330         uint32_t reserved42;  /* ordinal112 */
3331         uint32_t reserved43;  /* ordinal113 */
3332         uint32_t cp_pq_exe_status_lo;  /* ordinal114 */
3333         uint32_t cp_pq_exe_status_hi;  /* ordinal115 */
3334         uint32_t cp_packet_id_lo;  /* ordinal116 */
3335         uint32_t cp_packet_id_hi;  /* ordinal117 */
3336         uint32_t cp_packet_exe_status_lo;  /* ordinal118 */
3337         uint32_t cp_packet_exe_status_hi;  /* ordinal119 */
3338         uint32_t gds_save_base_addr_lo;  /* ordinal120 */
3339         uint32_t gds_save_base_addr_hi;  /* ordinal121 */
3340         uint32_t gds_save_mask_lo;  /* ordinal122 */
3341         uint32_t gds_save_mask_hi;  /* ordinal123 */
3342         uint32_t ctx_save_base_addr_lo;  /* ordinal124 */
3343         uint32_t ctx_save_base_addr_hi;  /* ordinal125 */
3344         uint32_t reserved44;  /* ordinal126 */
3345         uint32_t reserved45;  /* ordinal127 */
3346         uint32_t cp_mqd_base_addr_lo;  /* ordinal128 */
3347         uint32_t cp_mqd_base_addr_hi;  /* ordinal129 */
3348         uint32_t cp_hqd_active;  /* ordinal130 */
3349         uint32_t cp_hqd_vmid;  /* ordinal131 */
3350         uint32_t cp_hqd_persistent_state;  /* ordinal132 */
3351         uint32_t cp_hqd_pipe_priority;  /* ordinal133 */
3352         uint32_t cp_hqd_queue_priority;  /* ordinal134 */
3353         uint32_t cp_hqd_quantum;  /* ordinal135 */
3354         uint32_t cp_hqd_pq_base_lo;  /* ordinal136 */
3355         uint32_t cp_hqd_pq_base_hi;  /* ordinal137 */
3356         uint32_t cp_hqd_pq_rptr;  /* ordinal138 */
3357         uint32_t cp_hqd_pq_rptr_report_addr_lo;  /* ordinal139 */
3358         uint32_t cp_hqd_pq_rptr_report_addr_hi;  /* ordinal140 */
3359         uint32_t cp_hqd_pq_wptr_poll_addr;  /* ordinal141 */
3360         uint32_t cp_hqd_pq_wptr_poll_addr_hi;  /* ordinal142 */
3361         uint32_t cp_hqd_pq_doorbell_control;  /* ordinal143 */
3362         uint32_t cp_hqd_pq_wptr;  /* ordinal144 */
3363         uint32_t cp_hqd_pq_control;  /* ordinal145 */
3364         uint32_t cp_hqd_ib_base_addr_lo;  /* ordinal146 */
3365         uint32_t cp_hqd_ib_base_addr_hi;  /* ordinal147 */
3366         uint32_t cp_hqd_ib_rptr;  /* ordinal148 */
3367         uint32_t cp_hqd_ib_control;  /* ordinal149 */
3368         uint32_t cp_hqd_iq_timer;  /* ordinal150 */
3369         uint32_t cp_hqd_iq_rptr;  /* ordinal151 */
3370         uint32_t cp_hqd_dequeue_request;  /* ordinal152 */
3371         uint32_t cp_hqd_dma_offload;  /* ordinal153 */
3372         uint32_t cp_hqd_sema_cmd;  /* ordinal154 */
3373         uint32_t cp_hqd_msg_type;  /* ordinal155 */
3374         uint32_t cp_hqd_atomic0_preop_lo;  /* ordinal156 */
3375         uint32_t cp_hqd_atomic0_preop_hi;  /* ordinal157 */
3376         uint32_t cp_hqd_atomic1_preop_lo;  /* ordinal158 */
3377         uint32_t cp_hqd_atomic1_preop_hi;  /* ordinal159 */
3378         uint32_t cp_hqd_hq_status0;  /* ordinal160 */
3379         uint32_t cp_hqd_hq_control0;  /* ordinal161 */
3380         uint32_t cp_mqd_control;  /* ordinal162 */
3381         uint32_t cp_hqd_hq_status1;  /* ordinal163 */
3382         uint32_t cp_hqd_hq_control1;  /* ordinal164 */
3383         uint32_t cp_hqd_eop_base_addr_lo;  /* ordinal165 */
3384         uint32_t cp_hqd_eop_base_addr_hi;  /* ordinal166 */
3385         uint32_t cp_hqd_eop_control;  /* ordinal167 */
3386         uint32_t cp_hqd_eop_rptr;  /* ordinal168 */
3387         uint32_t cp_hqd_eop_wptr;  /* ordinal169 */
3388         uint32_t cp_hqd_eop_done_events;  /* ordinal170 */
3389         uint32_t cp_hqd_ctx_save_base_addr_lo;  /* ordinal171 */
3390         uint32_t cp_hqd_ctx_save_base_addr_hi;  /* ordinal172 */
3391         uint32_t cp_hqd_ctx_save_control;  /* ordinal173 */
3392         uint32_t cp_hqd_cntl_stack_offset;  /* ordinal174 */
3393         uint32_t cp_hqd_cntl_stack_size;  /* ordinal175 */
3394         uint32_t cp_hqd_wg_state_offset;  /* ordinal176 */
3395         uint32_t cp_hqd_ctx_save_size;  /* ordinal177 */
3396         uint32_t cp_hqd_gds_resource_state;  /* ordinal178 */
3397         uint32_t cp_hqd_error;  /* ordinal179 */
3398         uint32_t cp_hqd_eop_wptr_mem;  /* ordinal180 */
3399         uint32_t cp_hqd_eop_dones;  /* ordinal181 */
3400         uint32_t reserved46;  /* ordinal182 */
3401         uint32_t reserved47;  /* ordinal183 */
3402         uint32_t reserved48;  /* ordinal184 */
3403         uint32_t reserved49;  /* ordinal185 */
3404         uint32_t reserved50;  /* ordinal186 */
3405         uint32_t reserved51;  /* ordinal187 */
3406         uint32_t reserved52;  /* ordinal188 */
3407         uint32_t reserved53;  /* ordinal189 */
3408         uint32_t reserved54;  /* ordinal190 */
3409         uint32_t reserved55;  /* ordinal191 */
3410         uint32_t iqtimer_pkt_header;  /* ordinal192 */
3411         uint32_t iqtimer_pkt_dw0;  /* ordinal193 */
3412         uint32_t iqtimer_pkt_dw1;  /* ordinal194 */
3413         uint32_t iqtimer_pkt_dw2;  /* ordinal195 */
3414         uint32_t iqtimer_pkt_dw3;  /* ordinal196 */
3415         uint32_t iqtimer_pkt_dw4;  /* ordinal197 */
3416         uint32_t iqtimer_pkt_dw5;  /* ordinal198 */
3417         uint32_t iqtimer_pkt_dw6;  /* ordinal199 */
3418         uint32_t iqtimer_pkt_dw7;  /* ordinal200 */
3419         uint32_t iqtimer_pkt_dw8;  /* ordinal201 */
3420         uint32_t iqtimer_pkt_dw9;  /* ordinal202 */
3421         uint32_t iqtimer_pkt_dw10;  /* ordinal203 */
3422         uint32_t iqtimer_pkt_dw11;  /* ordinal204 */
3423         uint32_t iqtimer_pkt_dw12;  /* ordinal205 */
3424         uint32_t iqtimer_pkt_dw13;  /* ordinal206 */
3425         uint32_t iqtimer_pkt_dw14;  /* ordinal207 */
3426         uint32_t iqtimer_pkt_dw15;  /* ordinal208 */
3427         uint32_t iqtimer_pkt_dw16;  /* ordinal209 */
3428         uint32_t iqtimer_pkt_dw17;  /* ordinal210 */
3429         uint32_t iqtimer_pkt_dw18;  /* ordinal211 */
3430         uint32_t iqtimer_pkt_dw19;  /* ordinal212 */
3431         uint32_t iqtimer_pkt_dw20;  /* ordinal213 */
3432         uint32_t iqtimer_pkt_dw21;  /* ordinal214 */
3433         uint32_t iqtimer_pkt_dw22;  /* ordinal215 */
3434         uint32_t iqtimer_pkt_dw23;  /* ordinal216 */
3435         uint32_t iqtimer_pkt_dw24;  /* ordinal217 */
3436         uint32_t iqtimer_pkt_dw25;  /* ordinal218 */
3437         uint32_t iqtimer_pkt_dw26;  /* ordinal219 */
3438         uint32_t iqtimer_pkt_dw27;  /* ordinal220 */
3439         uint32_t iqtimer_pkt_dw28;  /* ordinal221 */
3440         uint32_t iqtimer_pkt_dw29;  /* ordinal222 */
3441         uint32_t iqtimer_pkt_dw30;  /* ordinal223 */
3442         uint32_t iqtimer_pkt_dw31;  /* ordinal224 */
3443         uint32_t reserved56;  /* ordinal225 */
3444         uint32_t reserved57;  /* ordinal226 */
3445         uint32_t reserved58;  /* ordinal227 */
3446         uint32_t set_resources_header;  /* ordinal228 */
3447         uint32_t set_resources_dw1;  /* ordinal229 */
3448         uint32_t set_resources_dw2;  /* ordinal230 */
3449         uint32_t set_resources_dw3;  /* ordinal231 */
3450         uint32_t set_resources_dw4;  /* ordinal232 */
3451         uint32_t set_resources_dw5;  /* ordinal233 */
3452         uint32_t set_resources_dw6;  /* ordinal234 */
3453         uint32_t set_resources_dw7;  /* ordinal235 */
3454         uint32_t reserved59;  /* ordinal236 */
3455         uint32_t reserved60;  /* ordinal237 */
3456         uint32_t reserved61;  /* ordinal238 */
3457         uint32_t reserved62;  /* ordinal239 */
3458         uint32_t reserved63;  /* ordinal240 */
3459         uint32_t reserved64;  /* ordinal241 */
3460         uint32_t reserved65;  /* ordinal242 */
3461         uint32_t reserved66;  /* ordinal243 */
3462         uint32_t reserved67;  /* ordinal244 */
3463         uint32_t reserved68;  /* ordinal245 */
3464         uint32_t reserved69;  /* ordinal246 */
3465         uint32_t reserved70;  /* ordinal247 */
3466         uint32_t reserved71;  /* ordinal248 */
3467         uint32_t reserved72;  /* ordinal249 */
3468         uint32_t reserved73;  /* ordinal250 */
3469         uint32_t reserved74;  /* ordinal251 */
3470         uint32_t reserved75;  /* ordinal252 */
3471         uint32_t reserved76;  /* ordinal253 */
3472         uint32_t reserved77;  /* ordinal254 */
3473         uint32_t reserved78;  /* ordinal255 */
3474
3475         uint32_t reserved_t[256]; /* Reserve 256 dword buffer used by ucode */
3476 };
3477
3478 static void gfx_v8_0_cp_compute_fini(struct amdgpu_device *adev)
3479 {
3480         int i, r;
3481
3482         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3483                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
3484
3485                 if (ring->mqd_obj) {
3486                         r = amdgpu_bo_reserve(ring->mqd_obj, false);
3487                         if (unlikely(r != 0))
3488                                 dev_warn(adev->dev, "(%d) reserve MQD bo failed\n", r);
3489
3490                         amdgpu_bo_unpin(ring->mqd_obj);
3491                         amdgpu_bo_unreserve(ring->mqd_obj);
3492
3493                         amdgpu_bo_unref(&ring->mqd_obj);
3494                         ring->mqd_obj = NULL;
3495                 }
3496         }
3497 }
3498
3499 static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev)
3500 {
3501         int r, i, j;
3502         u32 tmp;
3503         bool use_doorbell = true;
3504         u64 hqd_gpu_addr;
3505         u64 mqd_gpu_addr;
3506         u64 eop_gpu_addr;
3507         u64 wb_gpu_addr;
3508         u32 *buf;
3509         struct vi_mqd *mqd;
3510
3511         /* init the pipes */
3512         mutex_lock(&adev->srbm_mutex);
3513         for (i = 0; i < (adev->gfx.mec.num_pipe * adev->gfx.mec.num_mec); i++) {
3514                 int me = (i < 4) ? 1 : 2;
3515                 int pipe = (i < 4) ? i : (i - 4);
3516
3517                 eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE);
3518                 eop_gpu_addr >>= 8;
3519
3520                 vi_srbm_select(adev, me, pipe, 0, 0);
3521
3522                 /* write the EOP addr */
3523                 WREG32(mmCP_HQD_EOP_BASE_ADDR, eop_gpu_addr);
3524                 WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr));
3525
3526                 /* set the VMID assigned */
3527                 WREG32(mmCP_HQD_VMID, 0);
3528
3529                 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3530                 tmp = RREG32(mmCP_HQD_EOP_CONTROL);
3531                 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
3532                                     (order_base_2(MEC_HPD_SIZE / 4) - 1));
3533                 WREG32(mmCP_HQD_EOP_CONTROL, tmp);
3534         }
3535         vi_srbm_select(adev, 0, 0, 0, 0);
3536         mutex_unlock(&adev->srbm_mutex);
3537
3538         /* init the queues.  Just two for now. */
3539         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3540                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
3541
3542                 if (ring->mqd_obj == NULL) {
3543                         r = amdgpu_bo_create(adev,
3544                                              sizeof(struct vi_mqd),
3545                                              PAGE_SIZE, true,
3546                                              AMDGPU_GEM_DOMAIN_GTT, 0, NULL,
3547                                              NULL, &ring->mqd_obj);
3548                         if (r) {
3549                                 dev_warn(adev->dev, "(%d) create MQD bo failed\n", r);
3550                                 return r;
3551                         }
3552                 }
3553
3554                 r = amdgpu_bo_reserve(ring->mqd_obj, false);
3555                 if (unlikely(r != 0)) {
3556                         gfx_v8_0_cp_compute_fini(adev);
3557                         return r;
3558                 }
3559                 r = amdgpu_bo_pin(ring->mqd_obj, AMDGPU_GEM_DOMAIN_GTT,
3560                                   &mqd_gpu_addr);
3561                 if (r) {
3562                         dev_warn(adev->dev, "(%d) pin MQD bo failed\n", r);
3563                         gfx_v8_0_cp_compute_fini(adev);
3564                         return r;
3565                 }
3566                 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&buf);
3567                 if (r) {
3568                         dev_warn(adev->dev, "(%d) map MQD bo failed\n", r);
3569                         gfx_v8_0_cp_compute_fini(adev);
3570                         return r;
3571                 }
3572
3573                 /* init the mqd struct */
3574                 memset(buf, 0, sizeof(struct vi_mqd));
3575
3576                 mqd = (struct vi_mqd *)buf;
3577                 mqd->header = 0xC0310800;
3578                 mqd->compute_pipelinestat_enable = 0x00000001;
3579                 mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
3580                 mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
3581                 mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
3582                 mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
3583                 mqd->compute_misc_reserved = 0x00000003;
3584
3585                 mutex_lock(&adev->srbm_mutex);
3586                 vi_srbm_select(adev, ring->me,
3587                                ring->pipe,
3588                                ring->queue, 0);
3589
3590                 /* disable wptr polling */
3591                 tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL);
3592                 tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3593                 WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp);
3594
3595                 mqd->cp_hqd_eop_base_addr_lo =
3596                         RREG32(mmCP_HQD_EOP_BASE_ADDR);
3597                 mqd->cp_hqd_eop_base_addr_hi =
3598                         RREG32(mmCP_HQD_EOP_BASE_ADDR_HI);
3599
3600                 /* enable doorbell? */
3601                 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
3602                 if (use_doorbell) {
3603                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
3604                 } else {
3605                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 0);
3606                 }
3607                 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, tmp);
3608                 mqd->cp_hqd_pq_doorbell_control = tmp;
3609
3610                 /* disable the queue if it's active */
3611                 mqd->cp_hqd_dequeue_request = 0;
3612                 mqd->cp_hqd_pq_rptr = 0;
3613                 mqd->cp_hqd_pq_wptr= 0;
3614                 if (RREG32(mmCP_HQD_ACTIVE) & 1) {
3615                         WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1);
3616                         for (j = 0; j < adev->usec_timeout; j++) {
3617                                 if (!(RREG32(mmCP_HQD_ACTIVE) & 1))
3618                                         break;
3619                                 udelay(1);
3620                         }
3621                         WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request);
3622                         WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr);
3623                         WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
3624                 }
3625
3626                 /* set the pointer to the MQD */
3627                 mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffffffc;
3628                 mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
3629                 WREG32(mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo);
3630                 WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
3631
3632                 /* set MQD vmid to 0 */
3633                 tmp = RREG32(mmCP_MQD_CONTROL);
3634                 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
3635                 WREG32(mmCP_MQD_CONTROL, tmp);
3636                 mqd->cp_mqd_control = tmp;
3637
3638                 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3639                 hqd_gpu_addr = ring->gpu_addr >> 8;
3640                 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
3641                 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3642                 WREG32(mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo);
3643                 WREG32(mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi);
3644
3645                 /* set up the HQD, this is similar to CP_RB0_CNTL */
3646                 tmp = RREG32(mmCP_HQD_PQ_CONTROL);
3647                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
3648                                     (order_base_2(ring->ring_size / 4) - 1));
3649                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
3650                                ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
3651 #ifdef __BIG_ENDIAN
3652                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
3653 #endif
3654                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
3655                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
3656                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
3657                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
3658                 WREG32(mmCP_HQD_PQ_CONTROL, tmp);
3659                 mqd->cp_hqd_pq_control = tmp;
3660
3661                 /* set the wb address wether it's enabled or not */
3662                 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
3663                 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
3664                 mqd->cp_hqd_pq_rptr_report_addr_hi =
3665                         upper_32_bits(wb_gpu_addr) & 0xffff;
3666                 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR,
3667                        mqd->cp_hqd_pq_rptr_report_addr_lo);
3668                 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3669                        mqd->cp_hqd_pq_rptr_report_addr_hi);
3670
3671                 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3672                 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
3673                 mqd->cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
3674                 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3675                 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr);
3676                 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
3677                        mqd->cp_hqd_pq_wptr_poll_addr_hi);
3678
3679                 /* enable the doorbell if requested */
3680                 if (use_doorbell) {
3681                         if ((adev->asic_type == CHIP_CARRIZO) ||
3682                             (adev->asic_type == CHIP_FIJI) ||
3683                             (adev->asic_type == CHIP_STONEY)) {
3684                                 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER,
3685                                        AMDGPU_DOORBELL_KIQ << 2);
3686                                 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER,
3687                                        AMDGPU_DOORBELL_MEC_RING7 << 2);
3688                         }
3689                         tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
3690                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3691                                             DOORBELL_OFFSET, ring->doorbell_index);
3692                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
3693                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_SOURCE, 0);
3694                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_HIT, 0);
3695                         mqd->cp_hqd_pq_doorbell_control = tmp;
3696
3697                 } else {
3698                         mqd->cp_hqd_pq_doorbell_control = 0;
3699                 }
3700                 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL,
3701                        mqd->cp_hqd_pq_doorbell_control);
3702
3703                 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3704                 ring->wptr = 0;
3705                 mqd->cp_hqd_pq_wptr = ring->wptr;
3706                 WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
3707                 mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
3708
3709                 /* set the vmid for the queue */
3710                 mqd->cp_hqd_vmid = 0;
3711                 WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid);
3712
3713                 tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
3714                 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
3715                 WREG32(mmCP_HQD_PERSISTENT_STATE, tmp);
3716                 mqd->cp_hqd_persistent_state = tmp;
3717                 if (adev->asic_type == CHIP_STONEY) {
3718                         tmp = RREG32(mmCP_ME1_PIPE3_INT_CNTL);
3719                         tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE3_INT_CNTL, GENERIC2_INT_ENABLE, 1);
3720                         WREG32(mmCP_ME1_PIPE3_INT_CNTL, tmp);
3721                 }
3722
3723                 /* activate the queue */
3724                 mqd->cp_hqd_active = 1;
3725                 WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active);
3726
3727                 vi_srbm_select(adev, 0, 0, 0, 0);
3728                 mutex_unlock(&adev->srbm_mutex);
3729
3730                 amdgpu_bo_kunmap(ring->mqd_obj);
3731                 amdgpu_bo_unreserve(ring->mqd_obj);
3732         }
3733
3734         if (use_doorbell) {
3735                 tmp = RREG32(mmCP_PQ_STATUS);
3736                 tmp = REG_SET_FIELD(tmp, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
3737                 WREG32(mmCP_PQ_STATUS, tmp);
3738         }
3739
3740         gfx_v8_0_cp_compute_enable(adev, true);
3741
3742         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3743                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
3744
3745                 ring->ready = true;
3746                 r = amdgpu_ring_test_ring(ring);
3747                 if (r)
3748                         ring->ready = false;
3749         }
3750
3751         return 0;
3752 }
3753
3754 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
3755 {
3756         int r;
3757
3758         if (!(adev->flags & AMD_IS_APU))
3759                 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
3760
3761         if (!adev->pp_enabled) {
3762                 if (!adev->firmware.smu_load) {
3763                         /* legacy firmware loading */
3764                         r = gfx_v8_0_cp_gfx_load_microcode(adev);
3765                         if (r)
3766                                 return r;
3767
3768                         r = gfx_v8_0_cp_compute_load_microcode(adev);
3769                         if (r)
3770                                 return r;
3771                 } else {
3772                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
3773                                                         AMDGPU_UCODE_ID_CP_CE);
3774                         if (r)
3775                                 return -EINVAL;
3776
3777                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
3778                                                         AMDGPU_UCODE_ID_CP_PFP);
3779                         if (r)
3780                                 return -EINVAL;
3781
3782                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
3783                                                         AMDGPU_UCODE_ID_CP_ME);
3784                         if (r)
3785                                 return -EINVAL;
3786
3787                         if (adev->asic_type == CHIP_TOPAZ) {
3788                                 r = gfx_v8_0_cp_compute_load_microcode(adev);
3789                                 if (r)
3790                                         return r;
3791                         } else {
3792                                 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
3793                                                                                  AMDGPU_UCODE_ID_CP_MEC1);
3794                                 if (r)
3795                                         return -EINVAL;
3796                         }
3797                 }
3798         }
3799
3800         r = gfx_v8_0_cp_gfx_resume(adev);
3801         if (r)
3802                 return r;
3803
3804         r = gfx_v8_0_cp_compute_resume(adev);
3805         if (r)
3806                 return r;
3807
3808         gfx_v8_0_enable_gui_idle_interrupt(adev, true);
3809
3810         return 0;
3811 }
3812
3813 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
3814 {
3815         gfx_v8_0_cp_gfx_enable(adev, enable);
3816         gfx_v8_0_cp_compute_enable(adev, enable);
3817 }
3818
3819 static int gfx_v8_0_hw_init(void *handle)
3820 {
3821         int r;
3822         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3823
3824         gfx_v8_0_init_golden_registers(adev);
3825
3826         gfx_v8_0_gpu_init(adev);
3827
3828         r = gfx_v8_0_rlc_resume(adev);
3829         if (r)
3830                 return r;
3831
3832         r = gfx_v8_0_cp_resume(adev);
3833         if (r)
3834                 return r;
3835
3836         return r;
3837 }
3838
3839 static int gfx_v8_0_hw_fini(void *handle)
3840 {
3841         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3842
3843         amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
3844         amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
3845         gfx_v8_0_cp_enable(adev, false);
3846         gfx_v8_0_rlc_stop(adev);
3847         gfx_v8_0_cp_compute_fini(adev);
3848
3849         return 0;
3850 }
3851
3852 static int gfx_v8_0_suspend(void *handle)
3853 {
3854         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3855
3856         return gfx_v8_0_hw_fini(adev);
3857 }
3858
3859 static int gfx_v8_0_resume(void *handle)
3860 {
3861         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3862
3863         return gfx_v8_0_hw_init(adev);
3864 }
3865
3866 static bool gfx_v8_0_is_idle(void *handle)
3867 {
3868         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3869
3870         if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE))
3871                 return false;
3872         else
3873                 return true;
3874 }
3875
3876 static int gfx_v8_0_wait_for_idle(void *handle)
3877 {
3878         unsigned i;
3879         u32 tmp;
3880         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3881
3882         for (i = 0; i < adev->usec_timeout; i++) {
3883                 /* read MC_STATUS */
3884                 tmp = RREG32(mmGRBM_STATUS) & GRBM_STATUS__GUI_ACTIVE_MASK;
3885
3886                 if (!REG_GET_FIELD(tmp, GRBM_STATUS, GUI_ACTIVE))
3887                         return 0;
3888                 udelay(1);
3889         }
3890         return -ETIMEDOUT;
3891 }
3892
3893 static void gfx_v8_0_print_status(void *handle)
3894 {
3895         int i;
3896         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3897
3898         dev_info(adev->dev, "GFX 8.x registers\n");
3899         dev_info(adev->dev, "  GRBM_STATUS=0x%08X\n",
3900                  RREG32(mmGRBM_STATUS));
3901         dev_info(adev->dev, "  GRBM_STATUS2=0x%08X\n",
3902                  RREG32(mmGRBM_STATUS2));
3903         dev_info(adev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
3904                  RREG32(mmGRBM_STATUS_SE0));
3905         dev_info(adev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
3906                  RREG32(mmGRBM_STATUS_SE1));
3907         dev_info(adev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
3908                  RREG32(mmGRBM_STATUS_SE2));
3909         dev_info(adev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
3910                  RREG32(mmGRBM_STATUS_SE3));
3911         dev_info(adev->dev, "  CP_STAT = 0x%08x\n", RREG32(mmCP_STAT));
3912         dev_info(adev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
3913                  RREG32(mmCP_STALLED_STAT1));
3914         dev_info(adev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
3915                  RREG32(mmCP_STALLED_STAT2));
3916         dev_info(adev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
3917                  RREG32(mmCP_STALLED_STAT3));
3918         dev_info(adev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
3919                  RREG32(mmCP_CPF_BUSY_STAT));
3920         dev_info(adev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
3921                  RREG32(mmCP_CPF_STALLED_STAT1));
3922         dev_info(adev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(mmCP_CPF_STATUS));
3923         dev_info(adev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(mmCP_CPC_BUSY_STAT));
3924         dev_info(adev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
3925                  RREG32(mmCP_CPC_STALLED_STAT1));
3926         dev_info(adev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(mmCP_CPC_STATUS));
3927
3928         for (i = 0; i < 32; i++) {
3929                 dev_info(adev->dev, "  GB_TILE_MODE%d=0x%08X\n",
3930                          i, RREG32(mmGB_TILE_MODE0 + (i * 4)));
3931         }
3932         for (i = 0; i < 16; i++) {
3933                 dev_info(adev->dev, "  GB_MACROTILE_MODE%d=0x%08X\n",
3934                          i, RREG32(mmGB_MACROTILE_MODE0 + (i * 4)));
3935         }
3936         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3937                 dev_info(adev->dev, "  se: %d\n", i);
3938                 gfx_v8_0_select_se_sh(adev, i, 0xffffffff);
3939                 dev_info(adev->dev, "  PA_SC_RASTER_CONFIG=0x%08X\n",
3940                          RREG32(mmPA_SC_RASTER_CONFIG));
3941                 dev_info(adev->dev, "  PA_SC_RASTER_CONFIG_1=0x%08X\n",
3942                          RREG32(mmPA_SC_RASTER_CONFIG_1));
3943         }
3944         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
3945
3946         dev_info(adev->dev, "  GB_ADDR_CONFIG=0x%08X\n",
3947                  RREG32(mmGB_ADDR_CONFIG));
3948         dev_info(adev->dev, "  HDP_ADDR_CONFIG=0x%08X\n",
3949                  RREG32(mmHDP_ADDR_CONFIG));
3950         dev_info(adev->dev, "  DMIF_ADDR_CALC=0x%08X\n",
3951                  RREG32(mmDMIF_ADDR_CALC));
3952
3953         dev_info(adev->dev, "  CP_MEQ_THRESHOLDS=0x%08X\n",
3954                  RREG32(mmCP_MEQ_THRESHOLDS));
3955         dev_info(adev->dev, "  SX_DEBUG_1=0x%08X\n",
3956                  RREG32(mmSX_DEBUG_1));
3957         dev_info(adev->dev, "  TA_CNTL_AUX=0x%08X\n",
3958                  RREG32(mmTA_CNTL_AUX));
3959         dev_info(adev->dev, "  SPI_CONFIG_CNTL=0x%08X\n",
3960                  RREG32(mmSPI_CONFIG_CNTL));
3961         dev_info(adev->dev, "  SQ_CONFIG=0x%08X\n",
3962                  RREG32(mmSQ_CONFIG));
3963         dev_info(adev->dev, "  DB_DEBUG=0x%08X\n",
3964                  RREG32(mmDB_DEBUG));
3965         dev_info(adev->dev, "  DB_DEBUG2=0x%08X\n",
3966                  RREG32(mmDB_DEBUG2));
3967         dev_info(adev->dev, "  DB_DEBUG3=0x%08X\n",
3968                  RREG32(mmDB_DEBUG3));
3969         dev_info(adev->dev, "  CB_HW_CONTROL=0x%08X\n",
3970                  RREG32(mmCB_HW_CONTROL));
3971         dev_info(adev->dev, "  SPI_CONFIG_CNTL_1=0x%08X\n",
3972                  RREG32(mmSPI_CONFIG_CNTL_1));
3973         dev_info(adev->dev, "  PA_SC_FIFO_SIZE=0x%08X\n",
3974                  RREG32(mmPA_SC_FIFO_SIZE));
3975         dev_info(adev->dev, "  VGT_NUM_INSTANCES=0x%08X\n",
3976                  RREG32(mmVGT_NUM_INSTANCES));
3977         dev_info(adev->dev, "  CP_PERFMON_CNTL=0x%08X\n",
3978                  RREG32(mmCP_PERFMON_CNTL));
3979         dev_info(adev->dev, "  PA_SC_FORCE_EOV_MAX_CNTS=0x%08X\n",
3980                  RREG32(mmPA_SC_FORCE_EOV_MAX_CNTS));
3981         dev_info(adev->dev, "  VGT_CACHE_INVALIDATION=0x%08X\n",
3982                  RREG32(mmVGT_CACHE_INVALIDATION));
3983         dev_info(adev->dev, "  VGT_GS_VERTEX_REUSE=0x%08X\n",
3984                  RREG32(mmVGT_GS_VERTEX_REUSE));
3985         dev_info(adev->dev, "  PA_SC_LINE_STIPPLE_STATE=0x%08X\n",
3986                  RREG32(mmPA_SC_LINE_STIPPLE_STATE));
3987         dev_info(adev->dev, "  PA_CL_ENHANCE=0x%08X\n",
3988                  RREG32(mmPA_CL_ENHANCE));
3989         dev_info(adev->dev, "  PA_SC_ENHANCE=0x%08X\n",
3990                  RREG32(mmPA_SC_ENHANCE));
3991
3992         dev_info(adev->dev, "  CP_ME_CNTL=0x%08X\n",
3993                  RREG32(mmCP_ME_CNTL));
3994         dev_info(adev->dev, "  CP_MAX_CONTEXT=0x%08X\n",
3995                  RREG32(mmCP_MAX_CONTEXT));
3996         dev_info(adev->dev, "  CP_ENDIAN_SWAP=0x%08X\n",
3997                  RREG32(mmCP_ENDIAN_SWAP));
3998         dev_info(adev->dev, "  CP_DEVICE_ID=0x%08X\n",
3999                  RREG32(mmCP_DEVICE_ID));
4000
4001         dev_info(adev->dev, "  CP_SEM_WAIT_TIMER=0x%08X\n",
4002                  RREG32(mmCP_SEM_WAIT_TIMER));
4003
4004         dev_info(adev->dev, "  CP_RB_WPTR_DELAY=0x%08X\n",
4005                  RREG32(mmCP_RB_WPTR_DELAY));
4006         dev_info(adev->dev, "  CP_RB_VMID=0x%08X\n",
4007                  RREG32(mmCP_RB_VMID));
4008         dev_info(adev->dev, "  CP_RB0_CNTL=0x%08X\n",
4009                  RREG32(mmCP_RB0_CNTL));
4010         dev_info(adev->dev, "  CP_RB0_WPTR=0x%08X\n",
4011                  RREG32(mmCP_RB0_WPTR));
4012         dev_info(adev->dev, "  CP_RB0_RPTR_ADDR=0x%08X\n",
4013                  RREG32(mmCP_RB0_RPTR_ADDR));
4014         dev_info(adev->dev, "  CP_RB0_RPTR_ADDR_HI=0x%08X\n",
4015                  RREG32(mmCP_RB0_RPTR_ADDR_HI));
4016         dev_info(adev->dev, "  CP_RB0_CNTL=0x%08X\n",
4017                  RREG32(mmCP_RB0_CNTL));
4018         dev_info(adev->dev, "  CP_RB0_BASE=0x%08X\n",
4019                  RREG32(mmCP_RB0_BASE));
4020         dev_info(adev->dev, "  CP_RB0_BASE_HI=0x%08X\n",
4021                  RREG32(mmCP_RB0_BASE_HI));
4022         dev_info(adev->dev, "  CP_MEC_CNTL=0x%08X\n",
4023                  RREG32(mmCP_MEC_CNTL));
4024         dev_info(adev->dev, "  CP_CPF_DEBUG=0x%08X\n",
4025                  RREG32(mmCP_CPF_DEBUG));
4026
4027         dev_info(adev->dev, "  SCRATCH_ADDR=0x%08X\n",
4028                  RREG32(mmSCRATCH_ADDR));
4029         dev_info(adev->dev, "  SCRATCH_UMSK=0x%08X\n",
4030                  RREG32(mmSCRATCH_UMSK));
4031
4032         dev_info(adev->dev, "  CP_INT_CNTL_RING0=0x%08X\n",
4033                  RREG32(mmCP_INT_CNTL_RING0));
4034         dev_info(adev->dev, "  RLC_LB_CNTL=0x%08X\n",
4035                  RREG32(mmRLC_LB_CNTL));
4036         dev_info(adev->dev, "  RLC_CNTL=0x%08X\n",
4037                  RREG32(mmRLC_CNTL));
4038         dev_info(adev->dev, "  RLC_CGCG_CGLS_CTRL=0x%08X\n",
4039                  RREG32(mmRLC_CGCG_CGLS_CTRL));
4040         dev_info(adev->dev, "  RLC_LB_CNTR_INIT=0x%08X\n",
4041                  RREG32(mmRLC_LB_CNTR_INIT));
4042         dev_info(adev->dev, "  RLC_LB_CNTR_MAX=0x%08X\n",
4043                  RREG32(mmRLC_LB_CNTR_MAX));
4044         dev_info(adev->dev, "  RLC_LB_INIT_CU_MASK=0x%08X\n",
4045                  RREG32(mmRLC_LB_INIT_CU_MASK));
4046         dev_info(adev->dev, "  RLC_LB_PARAMS=0x%08X\n",
4047                  RREG32(mmRLC_LB_PARAMS));
4048         dev_info(adev->dev, "  RLC_LB_CNTL=0x%08X\n",
4049                  RREG32(mmRLC_LB_CNTL));
4050         dev_info(adev->dev, "  RLC_MC_CNTL=0x%08X\n",
4051                  RREG32(mmRLC_MC_CNTL));
4052         dev_info(adev->dev, "  RLC_UCODE_CNTL=0x%08X\n",
4053                  RREG32(mmRLC_UCODE_CNTL));
4054
4055         mutex_lock(&adev->srbm_mutex);
4056         for (i = 0; i < 16; i++) {
4057                 vi_srbm_select(adev, 0, 0, 0, i);
4058                 dev_info(adev->dev, "  VM %d:\n", i);
4059                 dev_info(adev->dev, "  SH_MEM_CONFIG=0x%08X\n",
4060                          RREG32(mmSH_MEM_CONFIG));
4061                 dev_info(adev->dev, "  SH_MEM_APE1_BASE=0x%08X\n",
4062                          RREG32(mmSH_MEM_APE1_BASE));
4063                 dev_info(adev->dev, "  SH_MEM_APE1_LIMIT=0x%08X\n",
4064                          RREG32(mmSH_MEM_APE1_LIMIT));
4065                 dev_info(adev->dev, "  SH_MEM_BASES=0x%08X\n",
4066                          RREG32(mmSH_MEM_BASES));
4067         }
4068         vi_srbm_select(adev, 0, 0, 0, 0);
4069         mutex_unlock(&adev->srbm_mutex);
4070 }
4071
4072 static int gfx_v8_0_soft_reset(void *handle)
4073 {
4074         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4075         u32 tmp;
4076         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4077
4078         /* GRBM_STATUS */
4079         tmp = RREG32(mmGRBM_STATUS);
4080         if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
4081                    GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
4082                    GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
4083                    GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
4084                    GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
4085                    GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) {
4086                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4087                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4088                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4089                                                 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
4090         }
4091
4092         if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
4093                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4094                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4095                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4096                                                 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
4097         }
4098
4099         /* GRBM_STATUS2 */
4100         tmp = RREG32(mmGRBM_STATUS2);
4101         if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
4102                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4103                                                 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4104
4105         /* SRBM_STATUS */
4106         tmp = RREG32(mmSRBM_STATUS);
4107         if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
4108                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4109                                                 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
4110
4111         if (grbm_soft_reset || srbm_soft_reset) {
4112                 gfx_v8_0_print_status((void *)adev);
4113                 /* stop the rlc */
4114                 gfx_v8_0_rlc_stop(adev);
4115
4116                 /* Disable GFX parsing/prefetching */
4117                 gfx_v8_0_cp_gfx_enable(adev, false);
4118
4119                 /* Disable MEC parsing/prefetching */
4120                 gfx_v8_0_cp_compute_enable(adev, false);
4121
4122                 if (grbm_soft_reset || srbm_soft_reset) {
4123                         tmp = RREG32(mmGMCON_DEBUG);
4124                         tmp = REG_SET_FIELD(tmp,
4125                                             GMCON_DEBUG, GFX_STALL, 1);
4126                         tmp = REG_SET_FIELD(tmp,
4127                                             GMCON_DEBUG, GFX_CLEAR, 1);
4128                         WREG32(mmGMCON_DEBUG, tmp);
4129
4130                         udelay(50);
4131                 }
4132
4133                 if (grbm_soft_reset) {
4134                         tmp = RREG32(mmGRBM_SOFT_RESET);
4135                         tmp |= grbm_soft_reset;
4136                         dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
4137                         WREG32(mmGRBM_SOFT_RESET, tmp);
4138                         tmp = RREG32(mmGRBM_SOFT_RESET);
4139
4140                         udelay(50);
4141
4142                         tmp &= ~grbm_soft_reset;
4143                         WREG32(mmGRBM_SOFT_RESET, tmp);
4144                         tmp = RREG32(mmGRBM_SOFT_RESET);
4145                 }
4146
4147                 if (srbm_soft_reset) {
4148                         tmp = RREG32(mmSRBM_SOFT_RESET);
4149                         tmp |= srbm_soft_reset;
4150                         dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
4151                         WREG32(mmSRBM_SOFT_RESET, tmp);
4152                         tmp = RREG32(mmSRBM_SOFT_RESET);
4153
4154                         udelay(50);
4155
4156                         tmp &= ~srbm_soft_reset;
4157                         WREG32(mmSRBM_SOFT_RESET, tmp);
4158                         tmp = RREG32(mmSRBM_SOFT_RESET);
4159                 }
4160
4161                 if (grbm_soft_reset || srbm_soft_reset) {
4162                         tmp = RREG32(mmGMCON_DEBUG);
4163                         tmp = REG_SET_FIELD(tmp,
4164                                             GMCON_DEBUG, GFX_STALL, 0);
4165                         tmp = REG_SET_FIELD(tmp,
4166                                             GMCON_DEBUG, GFX_CLEAR, 0);
4167                         WREG32(mmGMCON_DEBUG, tmp);
4168                 }
4169
4170                 /* Wait a little for things to settle down */
4171                 udelay(50);
4172                 gfx_v8_0_print_status((void *)adev);
4173         }
4174         return 0;
4175 }
4176
4177 /**
4178  * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
4179  *
4180  * @adev: amdgpu_device pointer
4181  *
4182  * Fetches a GPU clock counter snapshot.
4183  * Returns the 64 bit clock counter snapshot.
4184  */
4185 uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
4186 {
4187         uint64_t clock;
4188
4189         mutex_lock(&adev->gfx.gpu_clock_mutex);
4190         WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
4191         clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
4192                 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
4193         mutex_unlock(&adev->gfx.gpu_clock_mutex);
4194         return clock;
4195 }
4196
4197 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
4198                                           uint32_t vmid,
4199                                           uint32_t gds_base, uint32_t gds_size,
4200                                           uint32_t gws_base, uint32_t gws_size,
4201                                           uint32_t oa_base, uint32_t oa_size)
4202 {
4203         gds_base = gds_base >> AMDGPU_GDS_SHIFT;
4204         gds_size = gds_size >> AMDGPU_GDS_SHIFT;
4205
4206         gws_base = gws_base >> AMDGPU_GWS_SHIFT;
4207         gws_size = gws_size >> AMDGPU_GWS_SHIFT;
4208
4209         oa_base = oa_base >> AMDGPU_OA_SHIFT;
4210         oa_size = oa_size >> AMDGPU_OA_SHIFT;
4211
4212         /* GDS Base */
4213         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4214         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4215                                 WRITE_DATA_DST_SEL(0)));
4216         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
4217         amdgpu_ring_write(ring, 0);
4218         amdgpu_ring_write(ring, gds_base);
4219
4220         /* GDS Size */
4221         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4222         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4223                                 WRITE_DATA_DST_SEL(0)));
4224         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
4225         amdgpu_ring_write(ring, 0);
4226         amdgpu_ring_write(ring, gds_size);
4227
4228         /* GWS */
4229         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4230         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4231                                 WRITE_DATA_DST_SEL(0)));
4232         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
4233         amdgpu_ring_write(ring, 0);
4234         amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
4235
4236         /* OA */
4237         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4238         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4239                                 WRITE_DATA_DST_SEL(0)));
4240         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
4241         amdgpu_ring_write(ring, 0);
4242         amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
4243 }
4244
4245 static int gfx_v8_0_early_init(void *handle)
4246 {
4247         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4248
4249         adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
4250         adev->gfx.num_compute_rings = GFX8_NUM_COMPUTE_RINGS;
4251         gfx_v8_0_set_ring_funcs(adev);
4252         gfx_v8_0_set_irq_funcs(adev);
4253         gfx_v8_0_set_gds_init(adev);
4254
4255         return 0;
4256 }
4257
4258 static int gfx_v8_0_late_init(void *handle)
4259 {
4260         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4261         int r;
4262
4263         r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
4264         if (r)
4265                 return r;
4266
4267         r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
4268         if (r)
4269                 return r;
4270
4271         /* requires IBs so do in late init after IB pool is initialized */
4272         r = gfx_v8_0_do_edc_gpr_workarounds(adev);
4273         if (r)
4274                 return r;
4275
4276         return 0;
4277 }
4278
4279 static int gfx_v8_0_set_powergating_state(void *handle,
4280                                           enum amd_powergating_state state)
4281 {
4282         return 0;
4283 }
4284
4285 static void fiji_send_serdes_cmd(struct amdgpu_device *adev,
4286                 uint32_t reg_addr, uint32_t cmd)
4287 {
4288         uint32_t data;
4289
4290         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
4291
4292         WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
4293         WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
4294
4295         data = RREG32(mmRLC_SERDES_WR_CTRL);
4296         data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
4297                         RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
4298                         RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
4299                         RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
4300                         RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
4301                         RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
4302                         RLC_SERDES_WR_CTRL__POWER_UP_MASK |
4303                         RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
4304                         RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
4305                         RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
4306                         RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
4307         data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
4308                         (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
4309                         (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
4310                         (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
4311
4312         WREG32(mmRLC_SERDES_WR_CTRL, data);
4313 }
4314
4315 static void fiji_update_medium_grain_clock_gating(struct amdgpu_device *adev,
4316                 bool enable)
4317 {
4318         uint32_t temp, data;
4319
4320         /* It is disabled by HW by default */
4321         if (enable) {
4322                 /* 1 - RLC memory Light sleep */
4323                 temp = data = RREG32(mmRLC_MEM_SLP_CNTL);
4324                 data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4325                 if (temp != data)
4326                         WREG32(mmRLC_MEM_SLP_CNTL, data);
4327
4328                 /* 2 - CP memory Light sleep */
4329                 temp = data = RREG32(mmCP_MEM_SLP_CNTL);
4330                 data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4331                 if (temp != data)
4332                         WREG32(mmCP_MEM_SLP_CNTL, data);
4333
4334                 /* 3 - RLC_CGTT_MGCG_OVERRIDE */
4335                 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
4336                 data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
4337                                 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
4338                                 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
4339                                 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
4340
4341                 if (temp != data)
4342                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
4343
4344                 /* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
4345                 gfx_v8_0_wait_for_rlc_serdes(adev);
4346
4347                 /* 5 - clear mgcg override */
4348                 fiji_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
4349
4350                 /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
4351                 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
4352                 data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
4353                 data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
4354                 data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
4355                 data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
4356                 data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
4357                 data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
4358                 data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
4359                 if (temp != data)
4360                         WREG32(mmCGTS_SM_CTRL_REG, data);
4361                 udelay(50);
4362
4363                 /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
4364                 gfx_v8_0_wait_for_rlc_serdes(adev);
4365         } else {
4366                 /* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
4367                 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
4368                 data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
4369                                 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
4370                                 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
4371                                 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
4372                 if (temp != data)
4373                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
4374
4375                 /* 2 - disable MGLS in RLC */
4376                 data = RREG32(mmRLC_MEM_SLP_CNTL);
4377                 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
4378                         data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4379                         WREG32(mmRLC_MEM_SLP_CNTL, data);
4380                 }
4381
4382                 /* 3 - disable MGLS in CP */
4383                 data = RREG32(mmCP_MEM_SLP_CNTL);
4384                 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
4385                         data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4386                         WREG32(mmCP_MEM_SLP_CNTL, data);
4387                 }
4388
4389                 /* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
4390                 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
4391                 data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
4392                                 CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
4393                 if (temp != data)
4394                         WREG32(mmCGTS_SM_CTRL_REG, data);
4395
4396                 /* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
4397                 gfx_v8_0_wait_for_rlc_serdes(adev);
4398
4399                 /* 6 - set mgcg override */
4400                 fiji_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
4401
4402                 udelay(50);
4403
4404                 /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
4405                 gfx_v8_0_wait_for_rlc_serdes(adev);
4406         }
4407 }
4408
4409 static void fiji_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
4410                 bool enable)
4411 {
4412         uint32_t temp, temp1, data, data1;
4413
4414         temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
4415
4416         if (enable) {
4417                 /* 1 enable cntx_empty_int_enable/cntx_busy_int_enable/
4418                  * Cmp_busy/GFX_Idle interrupts
4419                  */
4420                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4421
4422                 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
4423                 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
4424                 if (temp1 != data1)
4425                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
4426
4427                 /* 2 wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
4428                 gfx_v8_0_wait_for_rlc_serdes(adev);
4429
4430                 /* 3 - clear cgcg override */
4431                 fiji_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
4432
4433                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
4434                 gfx_v8_0_wait_for_rlc_serdes(adev);
4435
4436                 /* 4 - write cmd to set CGLS */
4437                 fiji_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
4438
4439                 /* 5 - enable cgcg */
4440                 data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
4441
4442                 /* enable cgls*/
4443                 data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
4444
4445                 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
4446                 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
4447
4448                 if (temp1 != data1)
4449                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
4450
4451                 if (temp != data)
4452                         WREG32(mmRLC_CGCG_CGLS_CTRL, data);
4453         } else {
4454                 /* disable cntx_empty_int_enable & GFX Idle interrupt */
4455                 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4456
4457                 /* TEST CGCG */
4458                 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
4459                 data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
4460                                 RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
4461                 if (temp1 != data1)
4462                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
4463
4464                 /* read gfx register to wake up cgcg */
4465                 RREG32(mmCB_CGTT_SCLK_CTRL);
4466                 RREG32(mmCB_CGTT_SCLK_CTRL);
4467                 RREG32(mmCB_CGTT_SCLK_CTRL);
4468                 RREG32(mmCB_CGTT_SCLK_CTRL);
4469
4470                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
4471                 gfx_v8_0_wait_for_rlc_serdes(adev);
4472
4473                 /* write cmd to Set CGCG Overrride */
4474                 fiji_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
4475
4476                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
4477                 gfx_v8_0_wait_for_rlc_serdes(adev);
4478
4479                 /* write cmd to Clear CGLS */
4480                 fiji_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
4481
4482                 /* disable cgcg, cgls should be disabled too. */
4483                 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
4484                                 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4485                 if (temp != data)
4486                         WREG32(mmRLC_CGCG_CGLS_CTRL, data);
4487         }
4488 }
4489 static int fiji_update_gfx_clock_gating(struct amdgpu_device *adev,
4490                 bool enable)
4491 {
4492         if (enable) {
4493                 /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
4494                  * ===  MGCG + MGLS + TS(CG/LS) ===
4495                  */
4496                 fiji_update_medium_grain_clock_gating(adev, enable);
4497                 fiji_update_coarse_grain_clock_gating(adev, enable);
4498         } else {
4499                 /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
4500                  * ===  CGCG + CGLS ===
4501                  */
4502                 fiji_update_coarse_grain_clock_gating(adev, enable);
4503                 fiji_update_medium_grain_clock_gating(adev, enable);
4504         }
4505         return 0;
4506 }
4507
4508 static int gfx_v8_0_set_clockgating_state(void *handle,
4509                                           enum amd_clockgating_state state)
4510 {
4511         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4512
4513         switch (adev->asic_type) {
4514         case CHIP_FIJI:
4515                 fiji_update_gfx_clock_gating(adev,
4516                                 state == AMD_CG_STATE_GATE ? true : false);
4517                 break;
4518         default:
4519                 break;
4520         }
4521         return 0;
4522 }
4523
4524 static u32 gfx_v8_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
4525 {
4526         u32 rptr;
4527
4528         rptr = ring->adev->wb.wb[ring->rptr_offs];
4529
4530         return rptr;
4531 }
4532
4533 static u32 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
4534 {
4535         struct amdgpu_device *adev = ring->adev;
4536         u32 wptr;
4537
4538         if (ring->use_doorbell)
4539                 /* XXX check if swapping is necessary on BE */
4540                 wptr = ring->adev->wb.wb[ring->wptr_offs];
4541         else
4542                 wptr = RREG32(mmCP_RB0_WPTR);
4543
4544         return wptr;
4545 }
4546
4547 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
4548 {
4549         struct amdgpu_device *adev = ring->adev;
4550
4551         if (ring->use_doorbell) {
4552                 /* XXX check if swapping is necessary on BE */
4553                 adev->wb.wb[ring->wptr_offs] = ring->wptr;
4554                 WDOORBELL32(ring->doorbell_index, ring->wptr);
4555         } else {
4556                 WREG32(mmCP_RB0_WPTR, ring->wptr);
4557                 (void)RREG32(mmCP_RB0_WPTR);
4558         }
4559 }
4560
4561 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
4562 {
4563         u32 ref_and_mask, reg_mem_engine;
4564
4565         if (ring->type == AMDGPU_RING_TYPE_COMPUTE) {
4566                 switch (ring->me) {
4567                 case 1:
4568                         ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
4569                         break;
4570                 case 2:
4571                         ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
4572                         break;
4573                 default:
4574                         return;
4575                 }
4576                 reg_mem_engine = 0;
4577         } else {
4578                 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
4579                 reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
4580         }
4581
4582         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
4583         amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
4584                                  WAIT_REG_MEM_FUNCTION(3) |  /* == */
4585                                  reg_mem_engine));
4586         amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
4587         amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
4588         amdgpu_ring_write(ring, ref_and_mask);
4589         amdgpu_ring_write(ring, ref_and_mask);
4590         amdgpu_ring_write(ring, 0x20); /* poll interval */
4591 }
4592
4593 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
4594                                   struct amdgpu_ib *ib)
4595 {
4596         bool need_ctx_switch = ring->current_ctx != ib->ctx;
4597         u32 header, control = 0;
4598         u32 next_rptr = ring->wptr + 5;
4599
4600         /* drop the CE preamble IB for the same context */
4601         if ((ib->flags & AMDGPU_IB_FLAG_PREAMBLE) && !need_ctx_switch)
4602                 return;
4603
4604         if (need_ctx_switch)
4605                 next_rptr += 2;
4606
4607         next_rptr += 4;
4608         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4609         amdgpu_ring_write(ring, WRITE_DATA_DST_SEL(5) | WR_CONFIRM);
4610         amdgpu_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
4611         amdgpu_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
4612         amdgpu_ring_write(ring, next_rptr);
4613
4614         /* insert SWITCH_BUFFER packet before first IB in the ring frame */
4615         if (need_ctx_switch) {
4616                 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4617                 amdgpu_ring_write(ring, 0);
4618         }
4619
4620         if (ib->flags & AMDGPU_IB_FLAG_CE)
4621                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
4622         else
4623                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
4624
4625         control |= ib->length_dw |
4626                 (ib->vm ? (ib->vm->ids[ring->idx].id << 24) : 0);
4627
4628         amdgpu_ring_write(ring, header);
4629         amdgpu_ring_write(ring,
4630 #ifdef __BIG_ENDIAN
4631                           (2 << 0) |
4632 #endif
4633                           (ib->gpu_addr & 0xFFFFFFFC));
4634         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
4635         amdgpu_ring_write(ring, control);
4636 }
4637
4638 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
4639                                   struct amdgpu_ib *ib)
4640 {
4641         u32 header, control = 0;
4642         u32 next_rptr = ring->wptr + 5;
4643
4644         control |= INDIRECT_BUFFER_VALID;
4645
4646         next_rptr += 4;
4647         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4648         amdgpu_ring_write(ring, WRITE_DATA_DST_SEL(5) | WR_CONFIRM);
4649         amdgpu_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
4650         amdgpu_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
4651         amdgpu_ring_write(ring, next_rptr);
4652
4653         header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
4654
4655         control |= ib->length_dw |
4656                            (ib->vm ? (ib->vm->ids[ring->idx].id << 24) : 0);
4657
4658         amdgpu_ring_write(ring, header);
4659         amdgpu_ring_write(ring,
4660 #ifdef __BIG_ENDIAN
4661                                           (2 << 0) |
4662 #endif
4663                                           (ib->gpu_addr & 0xFFFFFFFC));
4664         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
4665         amdgpu_ring_write(ring, control);
4666 }
4667
4668 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
4669                                          u64 seq, unsigned flags)
4670 {
4671         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
4672         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
4673
4674         /* EVENT_WRITE_EOP - flush caches, send int */
4675         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
4676         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
4677                                  EOP_TC_ACTION_EN |
4678                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
4679                                  EVENT_INDEX(5)));
4680         amdgpu_ring_write(ring, addr & 0xfffffffc);
4681         amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
4682                           DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
4683         amdgpu_ring_write(ring, lower_32_bits(seq));
4684         amdgpu_ring_write(ring, upper_32_bits(seq));
4685
4686 }
4687
4688 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
4689                                         unsigned vm_id, uint64_t pd_addr)
4690 {
4691         int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX);
4692         uint32_t seq = ring->fence_drv.sync_seq;
4693         uint64_t addr = ring->fence_drv.gpu_addr;
4694
4695         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
4696         amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
4697                  WAIT_REG_MEM_FUNCTION(3))); /* equal */
4698         amdgpu_ring_write(ring, addr & 0xfffffffc);
4699         amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
4700         amdgpu_ring_write(ring, seq);
4701         amdgpu_ring_write(ring, 0xffffffff);
4702         amdgpu_ring_write(ring, 4); /* poll interval */
4703
4704         if (usepfp) {
4705                 /* synce CE with ME to prevent CE fetch CEIB before context switch done */
4706                 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4707                 amdgpu_ring_write(ring, 0);
4708                 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4709                 amdgpu_ring_write(ring, 0);
4710         }
4711
4712         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4713         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
4714                                  WRITE_DATA_DST_SEL(0)) |
4715                                  WR_CONFIRM);
4716         if (vm_id < 8) {
4717                 amdgpu_ring_write(ring,
4718                                   (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id));
4719         } else {
4720                 amdgpu_ring_write(ring,
4721                                   (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vm_id - 8));
4722         }
4723         amdgpu_ring_write(ring, 0);
4724         amdgpu_ring_write(ring, pd_addr >> 12);
4725
4726         /* bits 0-15 are the VM contexts0-15 */
4727         /* invalidate the cache */
4728         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4729         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4730                                  WRITE_DATA_DST_SEL(0)));
4731         amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
4732         amdgpu_ring_write(ring, 0);
4733         amdgpu_ring_write(ring, 1 << vm_id);
4734
4735         /* wait for the invalidate to complete */
4736         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
4737         amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
4738                                  WAIT_REG_MEM_FUNCTION(0) |  /* always */
4739                                  WAIT_REG_MEM_ENGINE(0))); /* me */
4740         amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
4741         amdgpu_ring_write(ring, 0);
4742         amdgpu_ring_write(ring, 0); /* ref */
4743         amdgpu_ring_write(ring, 0); /* mask */
4744         amdgpu_ring_write(ring, 0x20); /* poll interval */
4745
4746         /* compute doesn't have PFP */
4747         if (usepfp) {
4748                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
4749                 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4750                 amdgpu_ring_write(ring, 0x0);
4751                 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4752                 amdgpu_ring_write(ring, 0);
4753                 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4754                 amdgpu_ring_write(ring, 0);
4755         }
4756 }
4757
4758 static u32 gfx_v8_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
4759 {
4760         return ring->adev->wb.wb[ring->rptr_offs];
4761 }
4762
4763 static u32 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
4764 {
4765         return ring->adev->wb.wb[ring->wptr_offs];
4766 }
4767
4768 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
4769 {
4770         struct amdgpu_device *adev = ring->adev;
4771
4772         /* XXX check if swapping is necessary on BE */
4773         adev->wb.wb[ring->wptr_offs] = ring->wptr;
4774         WDOORBELL32(ring->doorbell_index, ring->wptr);
4775 }
4776
4777 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
4778                                              u64 addr, u64 seq,
4779                                              unsigned flags)
4780 {
4781         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
4782         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
4783
4784         /* RELEASE_MEM - flush caches, send int */
4785         amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
4786         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
4787                                  EOP_TC_ACTION_EN |
4788                                  EOP_TC_WB_ACTION_EN |
4789                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
4790                                  EVENT_INDEX(5)));
4791         amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
4792         amdgpu_ring_write(ring, addr & 0xfffffffc);
4793         amdgpu_ring_write(ring, upper_32_bits(addr));
4794         amdgpu_ring_write(ring, lower_32_bits(seq));
4795         amdgpu_ring_write(ring, upper_32_bits(seq));
4796 }
4797
4798 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
4799                                                  enum amdgpu_interrupt_state state)
4800 {
4801         u32 cp_int_cntl;
4802
4803         switch (state) {
4804         case AMDGPU_IRQ_STATE_DISABLE:
4805                 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
4806                 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
4807                                             TIME_STAMP_INT_ENABLE, 0);
4808                 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
4809                 break;
4810         case AMDGPU_IRQ_STATE_ENABLE:
4811                 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
4812                 cp_int_cntl =
4813                         REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
4814                                       TIME_STAMP_INT_ENABLE, 1);
4815                 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
4816                 break;
4817         default:
4818                 break;
4819         }
4820 }
4821
4822 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
4823                                                      int me, int pipe,
4824                                                      enum amdgpu_interrupt_state state)
4825 {
4826         u32 mec_int_cntl, mec_int_cntl_reg;
4827
4828         /*
4829          * amdgpu controls only pipe 0 of MEC1. That's why this function only
4830          * handles the setting of interrupts for this specific pipe. All other
4831          * pipes' interrupts are set by amdkfd.
4832          */
4833
4834         if (me == 1) {
4835                 switch (pipe) {
4836                 case 0:
4837                         mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
4838                         break;
4839                 default:
4840                         DRM_DEBUG("invalid pipe %d\n", pipe);
4841                         return;
4842                 }
4843         } else {
4844                 DRM_DEBUG("invalid me %d\n", me);
4845                 return;
4846         }
4847
4848         switch (state) {
4849         case AMDGPU_IRQ_STATE_DISABLE:
4850                 mec_int_cntl = RREG32(mec_int_cntl_reg);
4851                 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
4852                                              TIME_STAMP_INT_ENABLE, 0);
4853                 WREG32(mec_int_cntl_reg, mec_int_cntl);
4854                 break;
4855         case AMDGPU_IRQ_STATE_ENABLE:
4856                 mec_int_cntl = RREG32(mec_int_cntl_reg);
4857                 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
4858                                              TIME_STAMP_INT_ENABLE, 1);
4859                 WREG32(mec_int_cntl_reg, mec_int_cntl);
4860                 break;
4861         default:
4862                 break;
4863         }
4864 }
4865
4866 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
4867                                              struct amdgpu_irq_src *source,
4868                                              unsigned type,
4869                                              enum amdgpu_interrupt_state state)
4870 {
4871         u32 cp_int_cntl;
4872
4873         switch (state) {
4874         case AMDGPU_IRQ_STATE_DISABLE:
4875                 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
4876                 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
4877                                             PRIV_REG_INT_ENABLE, 0);
4878                 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
4879                 break;
4880         case AMDGPU_IRQ_STATE_ENABLE:
4881                 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
4882                 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
4883                                             PRIV_REG_INT_ENABLE, 0);
4884                 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
4885                 break;
4886         default:
4887                 break;
4888         }
4889
4890         return 0;
4891 }
4892
4893 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
4894                                               struct amdgpu_irq_src *source,
4895                                               unsigned type,
4896                                               enum amdgpu_interrupt_state state)
4897 {
4898         u32 cp_int_cntl;
4899
4900         switch (state) {
4901         case AMDGPU_IRQ_STATE_DISABLE:
4902                 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
4903                 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
4904                                             PRIV_INSTR_INT_ENABLE, 0);
4905                 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
4906                 break;
4907         case AMDGPU_IRQ_STATE_ENABLE:
4908                 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
4909                 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
4910                                             PRIV_INSTR_INT_ENABLE, 1);
4911                 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
4912                 break;
4913         default:
4914                 break;
4915         }
4916
4917         return 0;
4918 }
4919
4920 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
4921                                             struct amdgpu_irq_src *src,
4922                                             unsigned type,
4923                                             enum amdgpu_interrupt_state state)
4924 {
4925         switch (type) {
4926         case AMDGPU_CP_IRQ_GFX_EOP:
4927                 gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
4928                 break;
4929         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
4930                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
4931                 break;
4932         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
4933                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
4934                 break;
4935         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
4936                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
4937                 break;
4938         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
4939                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
4940                 break;
4941         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
4942                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
4943                 break;
4944         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
4945                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
4946                 break;
4947         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
4948                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
4949                 break;
4950         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
4951                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
4952                 break;
4953         default:
4954                 break;
4955         }
4956         return 0;
4957 }
4958
4959 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
4960                             struct amdgpu_irq_src *source,
4961                             struct amdgpu_iv_entry *entry)
4962 {
4963         int i;
4964         u8 me_id, pipe_id, queue_id;
4965         struct amdgpu_ring *ring;
4966
4967         DRM_DEBUG("IH: CP EOP\n");
4968         me_id = (entry->ring_id & 0x0c) >> 2;
4969         pipe_id = (entry->ring_id & 0x03) >> 0;
4970         queue_id = (entry->ring_id & 0x70) >> 4;
4971
4972         switch (me_id) {
4973         case 0:
4974                 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
4975                 break;
4976         case 1:
4977         case 2:
4978                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4979                         ring = &adev->gfx.compute_ring[i];
4980                         /* Per-queue interrupt is supported for MEC starting from VI.
4981                           * The interrupt can only be enabled/disabled per pipe instead of per queue.
4982                           */
4983                         if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
4984                                 amdgpu_fence_process(ring);
4985                 }
4986                 break;
4987         }
4988         return 0;
4989 }
4990
4991 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
4992                                  struct amdgpu_irq_src *source,
4993                                  struct amdgpu_iv_entry *entry)
4994 {
4995         DRM_ERROR("Illegal register access in command stream\n");
4996         schedule_work(&adev->reset_work);
4997         return 0;
4998 }
4999
5000 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
5001                                   struct amdgpu_irq_src *source,
5002                                   struct amdgpu_iv_entry *entry)
5003 {
5004         DRM_ERROR("Illegal instruction in command stream\n");
5005         schedule_work(&adev->reset_work);
5006         return 0;
5007 }
5008
5009 const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
5010         .early_init = gfx_v8_0_early_init,
5011         .late_init = gfx_v8_0_late_init,
5012         .sw_init = gfx_v8_0_sw_init,
5013         .sw_fini = gfx_v8_0_sw_fini,
5014         .hw_init = gfx_v8_0_hw_init,
5015         .hw_fini = gfx_v8_0_hw_fini,
5016         .suspend = gfx_v8_0_suspend,
5017         .resume = gfx_v8_0_resume,
5018         .is_idle = gfx_v8_0_is_idle,
5019         .wait_for_idle = gfx_v8_0_wait_for_idle,
5020         .soft_reset = gfx_v8_0_soft_reset,
5021         .print_status = gfx_v8_0_print_status,
5022         .set_clockgating_state = gfx_v8_0_set_clockgating_state,
5023         .set_powergating_state = gfx_v8_0_set_powergating_state,
5024 };
5025
5026 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
5027         .get_rptr = gfx_v8_0_ring_get_rptr_gfx,
5028         .get_wptr = gfx_v8_0_ring_get_wptr_gfx,
5029         .set_wptr = gfx_v8_0_ring_set_wptr_gfx,
5030         .parse_cs = NULL,
5031         .emit_ib = gfx_v8_0_ring_emit_ib_gfx,
5032         .emit_fence = gfx_v8_0_ring_emit_fence_gfx,
5033         .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
5034         .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
5035         .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
5036         .test_ring = gfx_v8_0_ring_test_ring,
5037         .test_ib = gfx_v8_0_ring_test_ib,
5038         .insert_nop = amdgpu_ring_insert_nop,
5039         .pad_ib = amdgpu_ring_generic_pad_ib,
5040 };
5041
5042 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
5043         .get_rptr = gfx_v8_0_ring_get_rptr_compute,
5044         .get_wptr = gfx_v8_0_ring_get_wptr_compute,
5045         .set_wptr = gfx_v8_0_ring_set_wptr_compute,
5046         .parse_cs = NULL,
5047         .emit_ib = gfx_v8_0_ring_emit_ib_compute,
5048         .emit_fence = gfx_v8_0_ring_emit_fence_compute,
5049         .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
5050         .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
5051         .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
5052         .test_ring = gfx_v8_0_ring_test_ring,
5053         .test_ib = gfx_v8_0_ring_test_ib,
5054         .insert_nop = amdgpu_ring_insert_nop,
5055         .pad_ib = amdgpu_ring_generic_pad_ib,
5056 };
5057
5058 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
5059 {
5060         int i;
5061
5062         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
5063                 adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
5064
5065         for (i = 0; i < adev->gfx.num_compute_rings; i++)
5066                 adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
5067 }
5068
5069 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
5070         .set = gfx_v8_0_set_eop_interrupt_state,
5071         .process = gfx_v8_0_eop_irq,
5072 };
5073
5074 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
5075         .set = gfx_v8_0_set_priv_reg_fault_state,
5076         .process = gfx_v8_0_priv_reg_irq,
5077 };
5078
5079 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
5080         .set = gfx_v8_0_set_priv_inst_fault_state,
5081         .process = gfx_v8_0_priv_inst_irq,
5082 };
5083
5084 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
5085 {
5086         adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
5087         adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
5088
5089         adev->gfx.priv_reg_irq.num_types = 1;
5090         adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
5091
5092         adev->gfx.priv_inst_irq.num_types = 1;
5093         adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
5094 }
5095
5096 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
5097 {
5098         /* init asci gds info */
5099         adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
5100         adev->gds.gws.total_size = 64;
5101         adev->gds.oa.total_size = 16;
5102
5103         if (adev->gds.mem.total_size == 64 * 1024) {
5104                 adev->gds.mem.gfx_partition_size = 4096;
5105                 adev->gds.mem.cs_partition_size = 4096;
5106
5107                 adev->gds.gws.gfx_partition_size = 4;
5108                 adev->gds.gws.cs_partition_size = 4;
5109
5110                 adev->gds.oa.gfx_partition_size = 4;
5111                 adev->gds.oa.cs_partition_size = 1;
5112         } else {
5113                 adev->gds.mem.gfx_partition_size = 1024;
5114                 adev->gds.mem.cs_partition_size = 1024;
5115
5116                 adev->gds.gws.gfx_partition_size = 16;
5117                 adev->gds.gws.cs_partition_size = 16;
5118
5119                 adev->gds.oa.gfx_partition_size = 4;
5120                 adev->gds.oa.cs_partition_size = 4;
5121         }
5122 }
5123
5124 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
5125 {
5126         u32 data, mask;
5127
5128         data = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG);
5129         data |= RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
5130
5131         data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
5132         data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
5133
5134         mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_backends_per_se /
5135                                        adev->gfx.config.max_sh_per_se);
5136
5137         return (~data) & mask;
5138 }
5139
5140 int gfx_v8_0_get_cu_info(struct amdgpu_device *adev,
5141                          struct amdgpu_cu_info *cu_info)
5142 {
5143         int i, j, k, counter, active_cu_number = 0;
5144         u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
5145
5146         if (!adev || !cu_info)
5147                 return -EINVAL;
5148
5149         mutex_lock(&adev->grbm_idx_mutex);
5150         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
5151                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
5152                         mask = 1;
5153                         ao_bitmap = 0;
5154                         counter = 0;
5155                         gfx_v8_0_select_se_sh(adev, i, j);
5156                         bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
5157                         cu_info->bitmap[i][j] = bitmap;
5158
5159                         for (k = 0; k < 16; k ++) {
5160                                 if (bitmap & mask) {
5161                                         if (counter < 2)
5162                                                 ao_bitmap |= mask;
5163                                         counter ++;
5164                                 }
5165                                 mask <<= 1;
5166                         }
5167                         active_cu_number += counter;
5168                         ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
5169                 }
5170         }
5171         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
5172         mutex_unlock(&adev->grbm_idx_mutex);
5173
5174         cu_info->number = active_cu_number;
5175         cu_info->ao_cu_mask = ao_cu_mask;
5176
5177         return 0;
5178 }
This page took 0.35073 seconds and 4 git commands to generate.