]> Git Repo - linux.git/blob - drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
Merge branch '2016-02-26-st-drm-next' of http://git.linaro.org/people/benjamin.gaigna...
[linux.git] / drivers / gpu / drm / amd / amdgpu / gfx_v8_0.c
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 #include <linux/firmware.h>
24 #include "drmP.h"
25 #include "amdgpu.h"
26 #include "amdgpu_gfx.h"
27 #include "vi.h"
28 #include "vid.h"
29 #include "amdgpu_ucode.h"
30 #include "clearstate_vi.h"
31
32 #include "gmc/gmc_8_2_d.h"
33 #include "gmc/gmc_8_2_sh_mask.h"
34
35 #include "oss/oss_3_0_d.h"
36 #include "oss/oss_3_0_sh_mask.h"
37
38 #include "bif/bif_5_0_d.h"
39 #include "bif/bif_5_0_sh_mask.h"
40
41 #include "gca/gfx_8_0_d.h"
42 #include "gca/gfx_8_0_enum.h"
43 #include "gca/gfx_8_0_sh_mask.h"
44 #include "gca/gfx_8_0_enum.h"
45
46 #include "dce/dce_10_0_d.h"
47 #include "dce/dce_10_0_sh_mask.h"
48
49 #define GFX8_NUM_GFX_RINGS     1
50 #define GFX8_NUM_COMPUTE_RINGS 8
51
52 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
53 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
54 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
55
56 #define ARRAY_MODE(x)                                   ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
57 #define PIPE_CONFIG(x)                                  ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
58 #define TILE_SPLIT(x)                                   ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
59 #define MICRO_TILE_MODE_NEW(x)                          ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
60 #define SAMPLE_SPLIT(x)                                 ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
61 #define BANK_WIDTH(x)                                   ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
62 #define BANK_HEIGHT(x)                                  ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
63 #define MACRO_TILE_ASPECT(x)                            ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
64 #define NUM_BANKS(x)                                    ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
65
66 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK            0x00000001L
67 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK            0x00000002L
68 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK           0x00000004L
69 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK           0x00000008L
70 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK           0x00000010L
71 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK           0x00000020L
72
73 /* BPM SERDES CMD */
74 #define SET_BPM_SERDES_CMD    1
75 #define CLE_BPM_SERDES_CMD    0
76
77 /* BPM Register Address*/
78 enum {
79         BPM_REG_CGLS_EN = 0,        /* Enable/Disable CGLS */
80         BPM_REG_CGLS_ON,            /* ON/OFF CGLS: shall be controlled by RLC FW */
81         BPM_REG_CGCG_OVERRIDE,      /* Set/Clear CGCG Override */
82         BPM_REG_MGCG_OVERRIDE,      /* Set/Clear MGCG Override */
83         BPM_REG_FGCG_OVERRIDE,      /* Set/Clear FGCG Override */
84         BPM_REG_FGCG_MAX
85 };
86
87 MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
88 MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
89 MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
90 MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
91 MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
92 MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
93
94 MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
95 MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
96 MODULE_FIRMWARE("amdgpu/stoney_me.bin");
97 MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
98 MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
99
100 MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
101 MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
102 MODULE_FIRMWARE("amdgpu/tonga_me.bin");
103 MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
104 MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
105 MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
106
107 MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
108 MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
109 MODULE_FIRMWARE("amdgpu/topaz_me.bin");
110 MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
111 MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
112
113 MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
114 MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
115 MODULE_FIRMWARE("amdgpu/fiji_me.bin");
116 MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
117 MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
118 MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
119
120 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
121 {
122         {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
123         {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
124         {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
125         {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
126         {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
127         {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
128         {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
129         {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
130         {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
131         {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
132         {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
133         {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
134         {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
135         {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
136         {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
137         {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
138 };
139
140 static const u32 golden_settings_tonga_a11[] =
141 {
142         mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
143         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
144         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
145         mmGB_GPU_ID, 0x0000000f, 0x00000000,
146         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
147         mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
148         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
149         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
150         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
151         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
152         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
153         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
154         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
155         mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
156         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
157 };
158
159 static const u32 tonga_golden_common_all[] =
160 {
161         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
162         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
163         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
164         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
165         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
166         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
167         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
168         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
169 };
170
171 static const u32 tonga_mgcg_cgcg_init[] =
172 {
173         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
174         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
175         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
176         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
177         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
178         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
179         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
180         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
181         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
182         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
183         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
184         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
185         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
186         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
187         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
188         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
189         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
190         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
191         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
192         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
193         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
194         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
195         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
196         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
197         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
198         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
199         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
200         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
201         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
202         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
203         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
204         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
205         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
206         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
207         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
208         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
209         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
210         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
211         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
212         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
213         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
214         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
215         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
216         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
217         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
218         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
219         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
220         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
221         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
222         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
223         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
224         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
225         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
226         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
227         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
228         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
229         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
230         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
231         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
232         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
233         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
234         mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
235         mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
236         mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
237         mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
238         mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
239         mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
240         mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
241         mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
242         mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
243         mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
244         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
245         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
246         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
247         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
248 };
249
250 static const u32 fiji_golden_common_all[] =
251 {
252         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
253         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
254         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
255         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
256         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
257         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
258         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
259         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
260         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
261         mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
262 };
263
264 static const u32 golden_settings_fiji_a10[] =
265 {
266         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
267         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
268         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
269         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
270         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
271         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
272         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
273         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
274         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
275         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
276         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
277 };
278
279 static const u32 fiji_mgcg_cgcg_init[] =
280 {
281         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
282         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
283         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
284         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
285         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
286         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
287         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
288         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
289         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
290         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
291         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
292         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
293         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
294         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
295         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
296         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
297         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
298         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
299         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
300         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
301         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
302         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
303         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
304         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
305         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
306         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
307         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
308         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
309         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
310         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
311         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
312         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
313         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
314         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
315         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
316 };
317
318 static const u32 golden_settings_iceland_a11[] =
319 {
320         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
321         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
322         mmDB_DEBUG3, 0xc0000000, 0xc0000000,
323         mmGB_GPU_ID, 0x0000000f, 0x00000000,
324         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
325         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
326         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
327         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
328         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
329         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
330         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
331         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
332         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
333         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
334         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
335 };
336
337 static const u32 iceland_golden_common_all[] =
338 {
339         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
340         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
341         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
342         mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
343         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
344         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
345         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
346         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
347 };
348
349 static const u32 iceland_mgcg_cgcg_init[] =
350 {
351         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
352         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
353         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
354         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
355         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
356         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
357         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
358         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
359         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
360         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
361         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
362         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
363         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
364         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
365         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
366         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
367         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
368         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
369         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
370         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
371         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
372         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
373         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
374         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
375         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
376         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
377         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
378         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
379         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
380         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
381         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
382         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
383         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
384         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
385         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
386         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
387         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
388         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
389         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
390         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
391         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
392         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
393         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
394         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
395         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
396         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
397         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
398         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
399         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
400         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
401         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
402         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
403         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
404         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
405         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
406         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
407         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
408         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
409         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
410         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
411         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
412         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
413         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
414         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
415 };
416
417 static const u32 cz_golden_settings_a11[] =
418 {
419         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
420         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
421         mmGB_GPU_ID, 0x0000000f, 0x00000000,
422         mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
423         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
424         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
425         mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
426         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
427         mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
428         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
429 };
430
431 static const u32 cz_golden_common_all[] =
432 {
433         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
434         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
435         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
436         mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
437         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
438         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
439         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
440         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
441 };
442
443 static const u32 cz_mgcg_cgcg_init[] =
444 {
445         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
446         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
447         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
448         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
449         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
450         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
451         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
452         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
453         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
454         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
455         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
456         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
457         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
458         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
459         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
460         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
461         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
462         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
463         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
464         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
465         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
466         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
467         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
468         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
469         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
470         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
471         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
472         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
473         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
474         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
475         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
476         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
477         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
478         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
479         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
480         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
481         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
482         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
483         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
484         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
485         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
486         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
487         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
488         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
489         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
490         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
491         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
492         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
493         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
494         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
495         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
496         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
497         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
498         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
499         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
500         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
501         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
502         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
503         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
504         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
505         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
506         mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
507         mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
508         mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
509         mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
510         mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
511         mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
512         mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
513         mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
514         mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
515         mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
516         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
517         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
518         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
519         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
520 };
521
522 static const u32 stoney_golden_settings_a11[] =
523 {
524         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
525         mmGB_GPU_ID, 0x0000000f, 0x00000000,
526         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
527         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
528         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
529         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
530         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
531         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
532         mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
533         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
534 };
535
536 static const u32 stoney_golden_common_all[] =
537 {
538         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
539         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
540         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
541         mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
542         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
543         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
544         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
545         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
546 };
547
548 static const u32 stoney_mgcg_cgcg_init[] =
549 {
550         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
551         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
552         mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
553         mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
554         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
555         mmATC_MISC_CG, 0xffffffff, 0x000c0200,
556 };
557
558 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
559 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
560 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
561
562 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
563 {
564         switch (adev->asic_type) {
565         case CHIP_TOPAZ:
566                 amdgpu_program_register_sequence(adev,
567                                                  iceland_mgcg_cgcg_init,
568                                                  (const u32)ARRAY_SIZE(iceland_mgcg_cgcg_init));
569                 amdgpu_program_register_sequence(adev,
570                                                  golden_settings_iceland_a11,
571                                                  (const u32)ARRAY_SIZE(golden_settings_iceland_a11));
572                 amdgpu_program_register_sequence(adev,
573                                                  iceland_golden_common_all,
574                                                  (const u32)ARRAY_SIZE(iceland_golden_common_all));
575                 break;
576         case CHIP_FIJI:
577                 amdgpu_program_register_sequence(adev,
578                                                  fiji_mgcg_cgcg_init,
579                                                  (const u32)ARRAY_SIZE(fiji_mgcg_cgcg_init));
580                 amdgpu_program_register_sequence(adev,
581                                                  golden_settings_fiji_a10,
582                                                  (const u32)ARRAY_SIZE(golden_settings_fiji_a10));
583                 amdgpu_program_register_sequence(adev,
584                                                  fiji_golden_common_all,
585                                                  (const u32)ARRAY_SIZE(fiji_golden_common_all));
586                 break;
587
588         case CHIP_TONGA:
589                 amdgpu_program_register_sequence(adev,
590                                                  tonga_mgcg_cgcg_init,
591                                                  (const u32)ARRAY_SIZE(tonga_mgcg_cgcg_init));
592                 amdgpu_program_register_sequence(adev,
593                                                  golden_settings_tonga_a11,
594                                                  (const u32)ARRAY_SIZE(golden_settings_tonga_a11));
595                 amdgpu_program_register_sequence(adev,
596                                                  tonga_golden_common_all,
597                                                  (const u32)ARRAY_SIZE(tonga_golden_common_all));
598                 break;
599         case CHIP_CARRIZO:
600                 amdgpu_program_register_sequence(adev,
601                                                  cz_mgcg_cgcg_init,
602                                                  (const u32)ARRAY_SIZE(cz_mgcg_cgcg_init));
603                 amdgpu_program_register_sequence(adev,
604                                                  cz_golden_settings_a11,
605                                                  (const u32)ARRAY_SIZE(cz_golden_settings_a11));
606                 amdgpu_program_register_sequence(adev,
607                                                  cz_golden_common_all,
608                                                  (const u32)ARRAY_SIZE(cz_golden_common_all));
609                 break;
610         case CHIP_STONEY:
611                 amdgpu_program_register_sequence(adev,
612                                                  stoney_mgcg_cgcg_init,
613                                                  (const u32)ARRAY_SIZE(stoney_mgcg_cgcg_init));
614                 amdgpu_program_register_sequence(adev,
615                                                  stoney_golden_settings_a11,
616                                                  (const u32)ARRAY_SIZE(stoney_golden_settings_a11));
617                 amdgpu_program_register_sequence(adev,
618                                                  stoney_golden_common_all,
619                                                  (const u32)ARRAY_SIZE(stoney_golden_common_all));
620                 break;
621         default:
622                 break;
623         }
624 }
625
626 static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
627 {
628         int i;
629
630         adev->gfx.scratch.num_reg = 7;
631         adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
632         for (i = 0; i < adev->gfx.scratch.num_reg; i++) {
633                 adev->gfx.scratch.free[i] = true;
634                 adev->gfx.scratch.reg[i] = adev->gfx.scratch.reg_base + i;
635         }
636 }
637
638 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
639 {
640         struct amdgpu_device *adev = ring->adev;
641         uint32_t scratch;
642         uint32_t tmp = 0;
643         unsigned i;
644         int r;
645
646         r = amdgpu_gfx_scratch_get(adev, &scratch);
647         if (r) {
648                 DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
649                 return r;
650         }
651         WREG32(scratch, 0xCAFEDEAD);
652         r = amdgpu_ring_alloc(ring, 3);
653         if (r) {
654                 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
655                           ring->idx, r);
656                 amdgpu_gfx_scratch_free(adev, scratch);
657                 return r;
658         }
659         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
660         amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
661         amdgpu_ring_write(ring, 0xDEADBEEF);
662         amdgpu_ring_commit(ring);
663
664         for (i = 0; i < adev->usec_timeout; i++) {
665                 tmp = RREG32(scratch);
666                 if (tmp == 0xDEADBEEF)
667                         break;
668                 DRM_UDELAY(1);
669         }
670         if (i < adev->usec_timeout) {
671                 DRM_INFO("ring test on %d succeeded in %d usecs\n",
672                          ring->idx, i);
673         } else {
674                 DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
675                           ring->idx, scratch, tmp);
676                 r = -EINVAL;
677         }
678         amdgpu_gfx_scratch_free(adev, scratch);
679         return r;
680 }
681
682 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring)
683 {
684         struct amdgpu_device *adev = ring->adev;
685         struct amdgpu_ib ib;
686         struct fence *f = NULL;
687         uint32_t scratch;
688         uint32_t tmp = 0;
689         unsigned i;
690         int r;
691
692         r = amdgpu_gfx_scratch_get(adev, &scratch);
693         if (r) {
694                 DRM_ERROR("amdgpu: failed to get scratch reg (%d).\n", r);
695                 return r;
696         }
697         WREG32(scratch, 0xCAFEDEAD);
698         memset(&ib, 0, sizeof(ib));
699         r = amdgpu_ib_get(adev, NULL, 256, &ib);
700         if (r) {
701                 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
702                 goto err1;
703         }
704         ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
705         ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START));
706         ib.ptr[2] = 0xDEADBEEF;
707         ib.length_dw = 3;
708
709         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
710         if (r)
711                 goto err2;
712
713         r = fence_wait(f, false);
714         if (r) {
715                 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
716                 goto err2;
717         }
718         for (i = 0; i < adev->usec_timeout; i++) {
719                 tmp = RREG32(scratch);
720                 if (tmp == 0xDEADBEEF)
721                         break;
722                 DRM_UDELAY(1);
723         }
724         if (i < adev->usec_timeout) {
725                 DRM_INFO("ib test on ring %d succeeded in %u usecs\n",
726                          ring->idx, i);
727                 goto err2;
728         } else {
729                 DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n",
730                           scratch, tmp);
731                 r = -EINVAL;
732         }
733 err2:
734         fence_put(f);
735         amdgpu_ib_free(adev, &ib);
736 err1:
737         amdgpu_gfx_scratch_free(adev, scratch);
738         return r;
739 }
740
741 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
742 {
743         const char *chip_name;
744         char fw_name[30];
745         int err;
746         struct amdgpu_firmware_info *info = NULL;
747         const struct common_firmware_header *header = NULL;
748         const struct gfx_firmware_header_v1_0 *cp_hdr;
749
750         DRM_DEBUG("\n");
751
752         switch (adev->asic_type) {
753         case CHIP_TOPAZ:
754                 chip_name = "topaz";
755                 break;
756         case CHIP_TONGA:
757                 chip_name = "tonga";
758                 break;
759         case CHIP_CARRIZO:
760                 chip_name = "carrizo";
761                 break;
762         case CHIP_FIJI:
763                 chip_name = "fiji";
764                 break;
765         case CHIP_STONEY:
766                 chip_name = "stoney";
767                 break;
768         default:
769                 BUG();
770         }
771
772         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
773         err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
774         if (err)
775                 goto out;
776         err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
777         if (err)
778                 goto out;
779         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
780         adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
781         adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
782
783         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
784         err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
785         if (err)
786                 goto out;
787         err = amdgpu_ucode_validate(adev->gfx.me_fw);
788         if (err)
789                 goto out;
790         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
791         adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
792         adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
793
794         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
795         err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
796         if (err)
797                 goto out;
798         err = amdgpu_ucode_validate(adev->gfx.ce_fw);
799         if (err)
800                 goto out;
801         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
802         adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
803         adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
804
805         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
806         err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
807         if (err)
808                 goto out;
809         err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
810         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.rlc_fw->data;
811         adev->gfx.rlc_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
812         adev->gfx.rlc_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
813
814         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
815         err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
816         if (err)
817                 goto out;
818         err = amdgpu_ucode_validate(adev->gfx.mec_fw);
819         if (err)
820                 goto out;
821         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
822         adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
823         adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
824
825         if ((adev->asic_type != CHIP_STONEY) &&
826             (adev->asic_type != CHIP_TOPAZ)) {
827                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
828                 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
829                 if (!err) {
830                         err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
831                         if (err)
832                                 goto out;
833                         cp_hdr = (const struct gfx_firmware_header_v1_0 *)
834                                 adev->gfx.mec2_fw->data;
835                         adev->gfx.mec2_fw_version =
836                                 le32_to_cpu(cp_hdr->header.ucode_version);
837                         adev->gfx.mec2_feature_version =
838                                 le32_to_cpu(cp_hdr->ucode_feature_version);
839                 } else {
840                         err = 0;
841                         adev->gfx.mec2_fw = NULL;
842                 }
843         }
844
845         if (adev->firmware.smu_load) {
846                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
847                 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
848                 info->fw = adev->gfx.pfp_fw;
849                 header = (const struct common_firmware_header *)info->fw->data;
850                 adev->firmware.fw_size +=
851                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
852
853                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
854                 info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
855                 info->fw = adev->gfx.me_fw;
856                 header = (const struct common_firmware_header *)info->fw->data;
857                 adev->firmware.fw_size +=
858                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
859
860                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
861                 info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
862                 info->fw = adev->gfx.ce_fw;
863                 header = (const struct common_firmware_header *)info->fw->data;
864                 adev->firmware.fw_size +=
865                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
866
867                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
868                 info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
869                 info->fw = adev->gfx.rlc_fw;
870                 header = (const struct common_firmware_header *)info->fw->data;
871                 adev->firmware.fw_size +=
872                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
873
874                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
875                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
876                 info->fw = adev->gfx.mec_fw;
877                 header = (const struct common_firmware_header *)info->fw->data;
878                 adev->firmware.fw_size +=
879                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
880
881                 if (adev->gfx.mec2_fw) {
882                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
883                         info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
884                         info->fw = adev->gfx.mec2_fw;
885                         header = (const struct common_firmware_header *)info->fw->data;
886                         adev->firmware.fw_size +=
887                                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
888                 }
889
890         }
891
892 out:
893         if (err) {
894                 dev_err(adev->dev,
895                         "gfx8: Failed to load firmware \"%s\"\n",
896                         fw_name);
897                 release_firmware(adev->gfx.pfp_fw);
898                 adev->gfx.pfp_fw = NULL;
899                 release_firmware(adev->gfx.me_fw);
900                 adev->gfx.me_fw = NULL;
901                 release_firmware(adev->gfx.ce_fw);
902                 adev->gfx.ce_fw = NULL;
903                 release_firmware(adev->gfx.rlc_fw);
904                 adev->gfx.rlc_fw = NULL;
905                 release_firmware(adev->gfx.mec_fw);
906                 adev->gfx.mec_fw = NULL;
907                 release_firmware(adev->gfx.mec2_fw);
908                 adev->gfx.mec2_fw = NULL;
909         }
910         return err;
911 }
912
913 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
914 {
915         int r;
916
917         if (adev->gfx.mec.hpd_eop_obj) {
918                 r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
919                 if (unlikely(r != 0))
920                         dev_warn(adev->dev, "(%d) reserve HPD EOP bo failed\n", r);
921                 amdgpu_bo_unpin(adev->gfx.mec.hpd_eop_obj);
922                 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
923
924                 amdgpu_bo_unref(&adev->gfx.mec.hpd_eop_obj);
925                 adev->gfx.mec.hpd_eop_obj = NULL;
926         }
927 }
928
929 #define MEC_HPD_SIZE 2048
930
931 static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
932 {
933         int r;
934         u32 *hpd;
935
936         /*
937          * we assign only 1 pipe because all other pipes will
938          * be handled by KFD
939          */
940         adev->gfx.mec.num_mec = 1;
941         adev->gfx.mec.num_pipe = 1;
942         adev->gfx.mec.num_queue = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * 8;
943
944         if (adev->gfx.mec.hpd_eop_obj == NULL) {
945                 r = amdgpu_bo_create(adev,
946                                      adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2,
947                                      PAGE_SIZE, true,
948                                      AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
949                                      &adev->gfx.mec.hpd_eop_obj);
950                 if (r) {
951                         dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
952                         return r;
953                 }
954         }
955
956         r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
957         if (unlikely(r != 0)) {
958                 gfx_v8_0_mec_fini(adev);
959                 return r;
960         }
961         r = amdgpu_bo_pin(adev->gfx.mec.hpd_eop_obj, AMDGPU_GEM_DOMAIN_GTT,
962                           &adev->gfx.mec.hpd_eop_gpu_addr);
963         if (r) {
964                 dev_warn(adev->dev, "(%d) pin HDP EOP bo failed\n", r);
965                 gfx_v8_0_mec_fini(adev);
966                 return r;
967         }
968         r = amdgpu_bo_kmap(adev->gfx.mec.hpd_eop_obj, (void **)&hpd);
969         if (r) {
970                 dev_warn(adev->dev, "(%d) map HDP EOP bo failed\n", r);
971                 gfx_v8_0_mec_fini(adev);
972                 return r;
973         }
974
975         memset(hpd, 0, adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2);
976
977         amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
978         amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
979
980         return 0;
981 }
982
983 static const u32 vgpr_init_compute_shader[] =
984 {
985         0x7e000209, 0x7e020208,
986         0x7e040207, 0x7e060206,
987         0x7e080205, 0x7e0a0204,
988         0x7e0c0203, 0x7e0e0202,
989         0x7e100201, 0x7e120200,
990         0x7e140209, 0x7e160208,
991         0x7e180207, 0x7e1a0206,
992         0x7e1c0205, 0x7e1e0204,
993         0x7e200203, 0x7e220202,
994         0x7e240201, 0x7e260200,
995         0x7e280209, 0x7e2a0208,
996         0x7e2c0207, 0x7e2e0206,
997         0x7e300205, 0x7e320204,
998         0x7e340203, 0x7e360202,
999         0x7e380201, 0x7e3a0200,
1000         0x7e3c0209, 0x7e3e0208,
1001         0x7e400207, 0x7e420206,
1002         0x7e440205, 0x7e460204,
1003         0x7e480203, 0x7e4a0202,
1004         0x7e4c0201, 0x7e4e0200,
1005         0x7e500209, 0x7e520208,
1006         0x7e540207, 0x7e560206,
1007         0x7e580205, 0x7e5a0204,
1008         0x7e5c0203, 0x7e5e0202,
1009         0x7e600201, 0x7e620200,
1010         0x7e640209, 0x7e660208,
1011         0x7e680207, 0x7e6a0206,
1012         0x7e6c0205, 0x7e6e0204,
1013         0x7e700203, 0x7e720202,
1014         0x7e740201, 0x7e760200,
1015         0x7e780209, 0x7e7a0208,
1016         0x7e7c0207, 0x7e7e0206,
1017         0xbf8a0000, 0xbf810000,
1018 };
1019
1020 static const u32 sgpr_init_compute_shader[] =
1021 {
1022         0xbe8a0100, 0xbe8c0102,
1023         0xbe8e0104, 0xbe900106,
1024         0xbe920108, 0xbe940100,
1025         0xbe960102, 0xbe980104,
1026         0xbe9a0106, 0xbe9c0108,
1027         0xbe9e0100, 0xbea00102,
1028         0xbea20104, 0xbea40106,
1029         0xbea60108, 0xbea80100,
1030         0xbeaa0102, 0xbeac0104,
1031         0xbeae0106, 0xbeb00108,
1032         0xbeb20100, 0xbeb40102,
1033         0xbeb60104, 0xbeb80106,
1034         0xbeba0108, 0xbebc0100,
1035         0xbebe0102, 0xbec00104,
1036         0xbec20106, 0xbec40108,
1037         0xbec60100, 0xbec80102,
1038         0xbee60004, 0xbee70005,
1039         0xbeea0006, 0xbeeb0007,
1040         0xbee80008, 0xbee90009,
1041         0xbefc0000, 0xbf8a0000,
1042         0xbf810000, 0x00000000,
1043 };
1044
1045 static const u32 vgpr_init_regs[] =
1046 {
1047         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1048         mmCOMPUTE_RESOURCE_LIMITS, 0,
1049         mmCOMPUTE_NUM_THREAD_X, 256*4,
1050         mmCOMPUTE_NUM_THREAD_Y, 1,
1051         mmCOMPUTE_NUM_THREAD_Z, 1,
1052         mmCOMPUTE_PGM_RSRC2, 20,
1053         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1054         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1055         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1056         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1057         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1058         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1059         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1060         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1061         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1062         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1063 };
1064
1065 static const u32 sgpr1_init_regs[] =
1066 {
1067         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1068         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1069         mmCOMPUTE_NUM_THREAD_X, 256*5,
1070         mmCOMPUTE_NUM_THREAD_Y, 1,
1071         mmCOMPUTE_NUM_THREAD_Z, 1,
1072         mmCOMPUTE_PGM_RSRC2, 20,
1073         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1074         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1075         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1076         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1077         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1078         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1079         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1080         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1081         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1082         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1083 };
1084
1085 static const u32 sgpr2_init_regs[] =
1086 {
1087         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1088         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1089         mmCOMPUTE_NUM_THREAD_X, 256*5,
1090         mmCOMPUTE_NUM_THREAD_Y, 1,
1091         mmCOMPUTE_NUM_THREAD_Z, 1,
1092         mmCOMPUTE_PGM_RSRC2, 20,
1093         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1094         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1095         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1096         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1097         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1098         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1099         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1100         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1101         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1102         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1103 };
1104
1105 static const u32 sec_ded_counter_registers[] =
1106 {
1107         mmCPC_EDC_ATC_CNT,
1108         mmCPC_EDC_SCRATCH_CNT,
1109         mmCPC_EDC_UCODE_CNT,
1110         mmCPF_EDC_ATC_CNT,
1111         mmCPF_EDC_ROQ_CNT,
1112         mmCPF_EDC_TAG_CNT,
1113         mmCPG_EDC_ATC_CNT,
1114         mmCPG_EDC_DMA_CNT,
1115         mmCPG_EDC_TAG_CNT,
1116         mmDC_EDC_CSINVOC_CNT,
1117         mmDC_EDC_RESTORE_CNT,
1118         mmDC_EDC_STATE_CNT,
1119         mmGDS_EDC_CNT,
1120         mmGDS_EDC_GRBM_CNT,
1121         mmGDS_EDC_OA_DED,
1122         mmSPI_EDC_CNT,
1123         mmSQC_ATC_EDC_GATCL1_CNT,
1124         mmSQC_EDC_CNT,
1125         mmSQ_EDC_DED_CNT,
1126         mmSQ_EDC_INFO,
1127         mmSQ_EDC_SEC_CNT,
1128         mmTCC_EDC_CNT,
1129         mmTCP_ATC_EDC_GATCL1_CNT,
1130         mmTCP_EDC_CNT,
1131         mmTD_EDC_CNT
1132 };
1133
1134 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1135 {
1136         struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1137         struct amdgpu_ib ib;
1138         struct fence *f = NULL;
1139         int r, i;
1140         u32 tmp;
1141         unsigned total_size, vgpr_offset, sgpr_offset;
1142         u64 gpu_addr;
1143
1144         /* only supported on CZ */
1145         if (adev->asic_type != CHIP_CARRIZO)
1146                 return 0;
1147
1148         /* bail if the compute ring is not ready */
1149         if (!ring->ready)
1150                 return 0;
1151
1152         tmp = RREG32(mmGB_EDC_MODE);
1153         WREG32(mmGB_EDC_MODE, 0);
1154
1155         total_size =
1156                 (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1157         total_size +=
1158                 (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1159         total_size +=
1160                 (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1161         total_size = ALIGN(total_size, 256);
1162         vgpr_offset = total_size;
1163         total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1164         sgpr_offset = total_size;
1165         total_size += sizeof(sgpr_init_compute_shader);
1166
1167         /* allocate an indirect buffer to put the commands in */
1168         memset(&ib, 0, sizeof(ib));
1169         r = amdgpu_ib_get(adev, NULL, total_size, &ib);
1170         if (r) {
1171                 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1172                 return r;
1173         }
1174
1175         /* load the compute shaders */
1176         for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1177                 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1178
1179         for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1180                 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1181
1182         /* init the ib length to 0 */
1183         ib.length_dw = 0;
1184
1185         /* VGPR */
1186         /* write the register state for the compute dispatch */
1187         for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1188                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1189                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1190                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1191         }
1192         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1193         gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1194         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1195         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1196         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1197         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1198
1199         /* write dispatch packet */
1200         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1201         ib.ptr[ib.length_dw++] = 8; /* x */
1202         ib.ptr[ib.length_dw++] = 1; /* y */
1203         ib.ptr[ib.length_dw++] = 1; /* z */
1204         ib.ptr[ib.length_dw++] =
1205                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1206
1207         /* write CS partial flush packet */
1208         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1209         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1210
1211         /* SGPR1 */
1212         /* write the register state for the compute dispatch */
1213         for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1214                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1215                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1216                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1217         }
1218         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1219         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1220         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1221         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1222         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1223         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1224
1225         /* write dispatch packet */
1226         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1227         ib.ptr[ib.length_dw++] = 8; /* x */
1228         ib.ptr[ib.length_dw++] = 1; /* y */
1229         ib.ptr[ib.length_dw++] = 1; /* z */
1230         ib.ptr[ib.length_dw++] =
1231                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1232
1233         /* write CS partial flush packet */
1234         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1235         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1236
1237         /* SGPR2 */
1238         /* write the register state for the compute dispatch */
1239         for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1240                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1241                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1242                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1243         }
1244         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1245         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1246         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1247         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1248         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1249         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1250
1251         /* write dispatch packet */
1252         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1253         ib.ptr[ib.length_dw++] = 8; /* x */
1254         ib.ptr[ib.length_dw++] = 1; /* y */
1255         ib.ptr[ib.length_dw++] = 1; /* z */
1256         ib.ptr[ib.length_dw++] =
1257                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1258
1259         /* write CS partial flush packet */
1260         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1261         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1262
1263         /* shedule the ib on the ring */
1264         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1265         if (r) {
1266                 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1267                 goto fail;
1268         }
1269
1270         /* wait for the GPU to finish processing the IB */
1271         r = fence_wait(f, false);
1272         if (r) {
1273                 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1274                 goto fail;
1275         }
1276
1277         tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1278         tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1279         WREG32(mmGB_EDC_MODE, tmp);
1280
1281         tmp = RREG32(mmCC_GC_EDC_CONFIG);
1282         tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1283         WREG32(mmCC_GC_EDC_CONFIG, tmp);
1284
1285
1286         /* read back registers to clear the counters */
1287         for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1288                 RREG32(sec_ded_counter_registers[i]);
1289
1290 fail:
1291         fence_put(f);
1292         amdgpu_ib_free(adev, &ib);
1293
1294         return r;
1295 }
1296
1297 static void gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1298 {
1299         u32 gb_addr_config;
1300         u32 mc_shared_chmap, mc_arb_ramcfg;
1301         u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1302         u32 tmp;
1303
1304         switch (adev->asic_type) {
1305         case CHIP_TOPAZ:
1306                 adev->gfx.config.max_shader_engines = 1;
1307                 adev->gfx.config.max_tile_pipes = 2;
1308                 adev->gfx.config.max_cu_per_sh = 6;
1309                 adev->gfx.config.max_sh_per_se = 1;
1310                 adev->gfx.config.max_backends_per_se = 2;
1311                 adev->gfx.config.max_texture_channel_caches = 2;
1312                 adev->gfx.config.max_gprs = 256;
1313                 adev->gfx.config.max_gs_threads = 32;
1314                 adev->gfx.config.max_hw_contexts = 8;
1315
1316                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1317                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1318                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1319                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1320                 gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1321                 break;
1322         case CHIP_FIJI:
1323                 adev->gfx.config.max_shader_engines = 4;
1324                 adev->gfx.config.max_tile_pipes = 16;
1325                 adev->gfx.config.max_cu_per_sh = 16;
1326                 adev->gfx.config.max_sh_per_se = 1;
1327                 adev->gfx.config.max_backends_per_se = 4;
1328                 adev->gfx.config.max_texture_channel_caches = 16;
1329                 adev->gfx.config.max_gprs = 256;
1330                 adev->gfx.config.max_gs_threads = 32;
1331                 adev->gfx.config.max_hw_contexts = 8;
1332
1333                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1334                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1335                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1336                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1337                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1338                 break;
1339         case CHIP_TONGA:
1340                 adev->gfx.config.max_shader_engines = 4;
1341                 adev->gfx.config.max_tile_pipes = 8;
1342                 adev->gfx.config.max_cu_per_sh = 8;
1343                 adev->gfx.config.max_sh_per_se = 1;
1344                 adev->gfx.config.max_backends_per_se = 2;
1345                 adev->gfx.config.max_texture_channel_caches = 8;
1346                 adev->gfx.config.max_gprs = 256;
1347                 adev->gfx.config.max_gs_threads = 32;
1348                 adev->gfx.config.max_hw_contexts = 8;
1349
1350                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1351                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1352                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1353                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1354                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1355                 break;
1356         case CHIP_CARRIZO:
1357                 adev->gfx.config.max_shader_engines = 1;
1358                 adev->gfx.config.max_tile_pipes = 2;
1359                 adev->gfx.config.max_sh_per_se = 1;
1360                 adev->gfx.config.max_backends_per_se = 2;
1361
1362                 switch (adev->pdev->revision) {
1363                 case 0xc4:
1364                 case 0x84:
1365                 case 0xc8:
1366                 case 0xcc:
1367                 case 0xe1:
1368                 case 0xe3:
1369                         /* B10 */
1370                         adev->gfx.config.max_cu_per_sh = 8;
1371                         break;
1372                 case 0xc5:
1373                 case 0x81:
1374                 case 0x85:
1375                 case 0xc9:
1376                 case 0xcd:
1377                 case 0xe2:
1378                 case 0xe4:
1379                         /* B8 */
1380                         adev->gfx.config.max_cu_per_sh = 6;
1381                         break;
1382                 case 0xc6:
1383                 case 0xca:
1384                 case 0xce:
1385                 case 0x88:
1386                         /* B6 */
1387                         adev->gfx.config.max_cu_per_sh = 6;
1388                         break;
1389                 case 0xc7:
1390                 case 0x87:
1391                 case 0xcb:
1392                 case 0xe5:
1393                 case 0x89:
1394                 default:
1395                         /* B4 */
1396                         adev->gfx.config.max_cu_per_sh = 4;
1397                         break;
1398                 }
1399
1400                 adev->gfx.config.max_texture_channel_caches = 2;
1401                 adev->gfx.config.max_gprs = 256;
1402                 adev->gfx.config.max_gs_threads = 32;
1403                 adev->gfx.config.max_hw_contexts = 8;
1404
1405                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1406                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1407                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1408                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1409                 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1410                 break;
1411         case CHIP_STONEY:
1412                 adev->gfx.config.max_shader_engines = 1;
1413                 adev->gfx.config.max_tile_pipes = 2;
1414                 adev->gfx.config.max_sh_per_se = 1;
1415                 adev->gfx.config.max_backends_per_se = 1;
1416
1417                 switch (adev->pdev->revision) {
1418                 case 0xc0:
1419                 case 0xc1:
1420                 case 0xc2:
1421                 case 0xc4:
1422                 case 0xc8:
1423                 case 0xc9:
1424                         adev->gfx.config.max_cu_per_sh = 3;
1425                         break;
1426                 case 0xd0:
1427                 case 0xd1:
1428                 case 0xd2:
1429                 default:
1430                         adev->gfx.config.max_cu_per_sh = 2;
1431                         break;
1432                 }
1433
1434                 adev->gfx.config.max_texture_channel_caches = 2;
1435                 adev->gfx.config.max_gprs = 256;
1436                 adev->gfx.config.max_gs_threads = 16;
1437                 adev->gfx.config.max_hw_contexts = 8;
1438
1439                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1440                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1441                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1442                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1443                 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1444                 break;
1445         default:
1446                 adev->gfx.config.max_shader_engines = 2;
1447                 adev->gfx.config.max_tile_pipes = 4;
1448                 adev->gfx.config.max_cu_per_sh = 2;
1449                 adev->gfx.config.max_sh_per_se = 1;
1450                 adev->gfx.config.max_backends_per_se = 2;
1451                 adev->gfx.config.max_texture_channel_caches = 4;
1452                 adev->gfx.config.max_gprs = 256;
1453                 adev->gfx.config.max_gs_threads = 32;
1454                 adev->gfx.config.max_hw_contexts = 8;
1455
1456                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1457                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1458                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1459                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1460                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1461                 break;
1462         }
1463
1464         mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
1465         adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1466         mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1467
1468         adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1469         adev->gfx.config.mem_max_burst_length_bytes = 256;
1470         if (adev->flags & AMD_IS_APU) {
1471                 /* Get memory bank mapping mode. */
1472                 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
1473                 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1474                 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1475
1476                 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
1477                 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1478                 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1479
1480                 /* Validate settings in case only one DIMM installed. */
1481                 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
1482                         dimm00_addr_map = 0;
1483                 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
1484                         dimm01_addr_map = 0;
1485                 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
1486                         dimm10_addr_map = 0;
1487                 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
1488                         dimm11_addr_map = 0;
1489
1490                 /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1491                 /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1492                 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
1493                         adev->gfx.config.mem_row_size_in_kb = 2;
1494                 else
1495                         adev->gfx.config.mem_row_size_in_kb = 1;
1496         } else {
1497                 tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
1498                 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1499                 if (adev->gfx.config.mem_row_size_in_kb > 4)
1500                         adev->gfx.config.mem_row_size_in_kb = 4;
1501         }
1502
1503         adev->gfx.config.shader_engine_tile_size = 32;
1504         adev->gfx.config.num_gpus = 1;
1505         adev->gfx.config.multi_gpu_tile_size = 64;
1506
1507         /* fix up row size */
1508         switch (adev->gfx.config.mem_row_size_in_kb) {
1509         case 1:
1510         default:
1511                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
1512                 break;
1513         case 2:
1514                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
1515                 break;
1516         case 4:
1517                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
1518                 break;
1519         }
1520         adev->gfx.config.gb_addr_config = gb_addr_config;
1521 }
1522
1523 static int gfx_v8_0_sw_init(void *handle)
1524 {
1525         int i, r;
1526         struct amdgpu_ring *ring;
1527         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1528
1529         /* EOP Event */
1530         r = amdgpu_irq_add_id(adev, 181, &adev->gfx.eop_irq);
1531         if (r)
1532                 return r;
1533
1534         /* Privileged reg */
1535         r = amdgpu_irq_add_id(adev, 184, &adev->gfx.priv_reg_irq);
1536         if (r)
1537                 return r;
1538
1539         /* Privileged inst */
1540         r = amdgpu_irq_add_id(adev, 185, &adev->gfx.priv_inst_irq);
1541         if (r)
1542                 return r;
1543
1544         adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
1545
1546         gfx_v8_0_scratch_init(adev);
1547
1548         r = gfx_v8_0_init_microcode(adev);
1549         if (r) {
1550                 DRM_ERROR("Failed to load gfx firmware!\n");
1551                 return r;
1552         }
1553
1554         r = gfx_v8_0_mec_init(adev);
1555         if (r) {
1556                 DRM_ERROR("Failed to init MEC BOs!\n");
1557                 return r;
1558         }
1559
1560         /* set up the gfx ring */
1561         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
1562                 ring = &adev->gfx.gfx_ring[i];
1563                 ring->ring_obj = NULL;
1564                 sprintf(ring->name, "gfx");
1565                 /* no gfx doorbells on iceland */
1566                 if (adev->asic_type != CHIP_TOPAZ) {
1567                         ring->use_doorbell = true;
1568                         ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0;
1569                 }
1570
1571                 r = amdgpu_ring_init(adev, ring, 1024 * 1024,
1572                                      PACKET3(PACKET3_NOP, 0x3FFF), 0xf,
1573                                      &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_EOP,
1574                                      AMDGPU_RING_TYPE_GFX);
1575                 if (r)
1576                         return r;
1577         }
1578
1579         /* set up the compute queues */
1580         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
1581                 unsigned irq_type;
1582
1583                 /* max 32 queues per MEC */
1584                 if ((i >= 32) || (i >= AMDGPU_MAX_COMPUTE_RINGS)) {
1585                         DRM_ERROR("Too many (%d) compute rings!\n", i);
1586                         break;
1587                 }
1588                 ring = &adev->gfx.compute_ring[i];
1589                 ring->ring_obj = NULL;
1590                 ring->use_doorbell = true;
1591                 ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + i;
1592                 ring->me = 1; /* first MEC */
1593                 ring->pipe = i / 8;
1594                 ring->queue = i % 8;
1595                 sprintf(ring->name, "comp %d.%d.%d", ring->me, ring->pipe, ring->queue);
1596                 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe;
1597                 /* type-2 packets are deprecated on MEC, use type-3 instead */
1598                 r = amdgpu_ring_init(adev, ring, 1024 * 1024,
1599                                      PACKET3(PACKET3_NOP, 0x3FFF), 0xf,
1600                                      &adev->gfx.eop_irq, irq_type,
1601                                      AMDGPU_RING_TYPE_COMPUTE);
1602                 if (r)
1603                         return r;
1604         }
1605
1606         /* reserve GDS, GWS and OA resource for gfx */
1607         r = amdgpu_bo_create(adev, adev->gds.mem.gfx_partition_size,
1608                         PAGE_SIZE, true,
1609                         AMDGPU_GEM_DOMAIN_GDS, 0, NULL,
1610                         NULL, &adev->gds.gds_gfx_bo);
1611         if (r)
1612                 return r;
1613
1614         r = amdgpu_bo_create(adev, adev->gds.gws.gfx_partition_size,
1615                 PAGE_SIZE, true,
1616                 AMDGPU_GEM_DOMAIN_GWS, 0, NULL,
1617                 NULL, &adev->gds.gws_gfx_bo);
1618         if (r)
1619                 return r;
1620
1621         r = amdgpu_bo_create(adev, adev->gds.oa.gfx_partition_size,
1622                         PAGE_SIZE, true,
1623                         AMDGPU_GEM_DOMAIN_OA, 0, NULL,
1624                         NULL, &adev->gds.oa_gfx_bo);
1625         if (r)
1626                 return r;
1627
1628         adev->gfx.ce_ram_size = 0x8000;
1629
1630         gfx_v8_0_gpu_early_init(adev);
1631
1632         return 0;
1633 }
1634
1635 static int gfx_v8_0_sw_fini(void *handle)
1636 {
1637         int i;
1638         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1639
1640         amdgpu_bo_unref(&adev->gds.oa_gfx_bo);
1641         amdgpu_bo_unref(&adev->gds.gws_gfx_bo);
1642         amdgpu_bo_unref(&adev->gds.gds_gfx_bo);
1643
1644         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
1645                 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
1646         for (i = 0; i < adev->gfx.num_compute_rings; i++)
1647                 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
1648
1649         gfx_v8_0_mec_fini(adev);
1650
1651         return 0;
1652 }
1653
1654 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
1655 {
1656         uint32_t *modearray, *mod2array;
1657         const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
1658         const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
1659         u32 reg_offset;
1660
1661         modearray = adev->gfx.config.tile_mode_array;
1662         mod2array = adev->gfx.config.macrotile_mode_array;
1663
1664         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
1665                 modearray[reg_offset] = 0;
1666
1667         for (reg_offset = 0; reg_offset <  num_secondary_tile_mode_states; reg_offset++)
1668                 mod2array[reg_offset] = 0;
1669
1670         switch (adev->asic_type) {
1671         case CHIP_TOPAZ:
1672                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1673                                 PIPE_CONFIG(ADDR_SURF_P2) |
1674                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1675                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1676                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1677                                 PIPE_CONFIG(ADDR_SURF_P2) |
1678                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
1679                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1680                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1681                                 PIPE_CONFIG(ADDR_SURF_P2) |
1682                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1683                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1684                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1685                                 PIPE_CONFIG(ADDR_SURF_P2) |
1686                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1687                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1688                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1689                                 PIPE_CONFIG(ADDR_SURF_P2) |
1690                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1691                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1692                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1693                                 PIPE_CONFIG(ADDR_SURF_P2) |
1694                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1695                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1696                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1697                                 PIPE_CONFIG(ADDR_SURF_P2) |
1698                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1699                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1700                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1701                                 PIPE_CONFIG(ADDR_SURF_P2));
1702                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1703                                 PIPE_CONFIG(ADDR_SURF_P2) |
1704                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1705                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1706                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1707                                  PIPE_CONFIG(ADDR_SURF_P2) |
1708                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1709                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1710                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1711                                  PIPE_CONFIG(ADDR_SURF_P2) |
1712                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1713                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1714                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1715                                  PIPE_CONFIG(ADDR_SURF_P2) |
1716                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1717                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1718                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1719                                  PIPE_CONFIG(ADDR_SURF_P2) |
1720                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1721                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1722                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
1723                                  PIPE_CONFIG(ADDR_SURF_P2) |
1724                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1725                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1726                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1727                                  PIPE_CONFIG(ADDR_SURF_P2) |
1728                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1729                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1730                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1731                                  PIPE_CONFIG(ADDR_SURF_P2) |
1732                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1733                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1734                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1735                                  PIPE_CONFIG(ADDR_SURF_P2) |
1736                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1737                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1738                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1739                                  PIPE_CONFIG(ADDR_SURF_P2) |
1740                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1741                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1742                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
1743                                  PIPE_CONFIG(ADDR_SURF_P2) |
1744                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1745                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1746                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
1747                                  PIPE_CONFIG(ADDR_SURF_P2) |
1748                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1749                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1750                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1751                                  PIPE_CONFIG(ADDR_SURF_P2) |
1752                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1753                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1754                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
1755                                  PIPE_CONFIG(ADDR_SURF_P2) |
1756                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1757                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1758                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
1759                                  PIPE_CONFIG(ADDR_SURF_P2) |
1760                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1761                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1762                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1763                                  PIPE_CONFIG(ADDR_SURF_P2) |
1764                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1765                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1766                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1767                                  PIPE_CONFIG(ADDR_SURF_P2) |
1768                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1769                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1770                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1771                                  PIPE_CONFIG(ADDR_SURF_P2) |
1772                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1773                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1774
1775                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1776                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1777                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1778                                 NUM_BANKS(ADDR_SURF_8_BANK));
1779                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1780                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1781                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1782                                 NUM_BANKS(ADDR_SURF_8_BANK));
1783                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1784                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1785                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1786                                 NUM_BANKS(ADDR_SURF_8_BANK));
1787                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1788                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1789                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1790                                 NUM_BANKS(ADDR_SURF_8_BANK));
1791                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1792                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1793                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1794                                 NUM_BANKS(ADDR_SURF_8_BANK));
1795                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1796                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1797                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1798                                 NUM_BANKS(ADDR_SURF_8_BANK));
1799                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1800                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1801                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1802                                 NUM_BANKS(ADDR_SURF_8_BANK));
1803                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1804                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1805                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1806                                 NUM_BANKS(ADDR_SURF_16_BANK));
1807                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1808                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1809                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1810                                 NUM_BANKS(ADDR_SURF_16_BANK));
1811                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1812                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1813                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1814                                  NUM_BANKS(ADDR_SURF_16_BANK));
1815                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1816                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1817                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1818                                  NUM_BANKS(ADDR_SURF_16_BANK));
1819                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1820                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1821                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1822                                  NUM_BANKS(ADDR_SURF_16_BANK));
1823                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1824                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1825                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1826                                  NUM_BANKS(ADDR_SURF_16_BANK));
1827                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1828                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1829                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1830                                  NUM_BANKS(ADDR_SURF_8_BANK));
1831
1832                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
1833                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
1834                             reg_offset != 23)
1835                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
1836
1837                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
1838                         if (reg_offset != 7)
1839                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
1840
1841                 break;
1842         case CHIP_FIJI:
1843                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1844                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1845                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1846                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1847                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1848                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1849                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
1850                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1851                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1852                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1853                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1854                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1855                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1856                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1857                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1858                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1859                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1860                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1861                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1862                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1863                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1864                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1865                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1866                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1867                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1868                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1869                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1870                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1871                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1872                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1873                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1874                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1875                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1876                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
1877                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1878                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1879                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1880                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1881                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1882                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1883                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1884                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1885                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1886                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1887                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1888                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1889                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1890                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1891                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1892                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1893                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1894                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1895                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1896                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1897                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1898                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1899                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1900                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1901                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
1902                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1903                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1904                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1905                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1906                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1907                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1908                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1909                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1910                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1911                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1912                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1913                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1914                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1915                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1916                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1917                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1918                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1919                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1920                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1921                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1922                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1923                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1924                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1925                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
1926                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1927                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1928                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1929                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
1930                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1931                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1932                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1933                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
1934                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1935                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1936                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1937                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1938                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1939                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1940                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1941                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
1942                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1943                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1944                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1945                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
1946                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1947                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1948                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1949                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1950                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1951                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1952                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1953                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1954                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1955                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1956                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1957                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1958                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1959                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1960                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1961                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1962                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1963                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1964                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1965
1966                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1967                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1968                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1969                                 NUM_BANKS(ADDR_SURF_8_BANK));
1970                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1971                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1972                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1973                                 NUM_BANKS(ADDR_SURF_8_BANK));
1974                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1975                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1976                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1977                                 NUM_BANKS(ADDR_SURF_8_BANK));
1978                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1979                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1980                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1981                                 NUM_BANKS(ADDR_SURF_8_BANK));
1982                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1983                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1984                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1985                                 NUM_BANKS(ADDR_SURF_8_BANK));
1986                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1987                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1988                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1989                                 NUM_BANKS(ADDR_SURF_8_BANK));
1990                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1991                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1992                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1993                                 NUM_BANKS(ADDR_SURF_8_BANK));
1994                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1995                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1996                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1997                                 NUM_BANKS(ADDR_SURF_8_BANK));
1998                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1999                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2000                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2001                                 NUM_BANKS(ADDR_SURF_8_BANK));
2002                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2003                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2004                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2005                                  NUM_BANKS(ADDR_SURF_8_BANK));
2006                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2007                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2008                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2009                                  NUM_BANKS(ADDR_SURF_8_BANK));
2010                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2011                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2012                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2013                                  NUM_BANKS(ADDR_SURF_8_BANK));
2014                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2015                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2016                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2017                                  NUM_BANKS(ADDR_SURF_8_BANK));
2018                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2019                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2020                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2021                                  NUM_BANKS(ADDR_SURF_4_BANK));
2022
2023                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2024                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2025
2026                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2027                         if (reg_offset != 7)
2028                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2029
2030                 break;
2031         case CHIP_TONGA:
2032                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2033                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2034                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2035                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2036                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2037                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2038                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2039                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2040                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2041                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2042                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2043                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2044                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2045                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2046                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2047                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2048                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2049                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2050                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2051                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2052                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2053                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2054                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2055                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2056                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2057                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2058                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2059                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2060                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2061                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2062                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2063                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2064                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2065                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2066                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2067                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2068                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2069                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2070                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2071                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2072                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2073                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2074                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2075                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2076                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2077                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2078                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2079                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2080                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2081                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2082                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2083                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2084                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2085                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2086                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2087                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2088                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2089                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2090                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2091                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2092                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2093                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2094                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2095                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2096                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2097                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2098                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2099                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2100                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2101                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2102                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2103                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2104                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2105                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2106                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2107                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2108                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2109                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2110                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2111                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2112                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2113                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2114                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2115                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2116                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2117                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2118                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2119                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2120                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2121                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2122                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2123                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2124                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2125                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2126                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2127                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2128                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2129                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2130                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2131                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2132                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2133                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2134                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2135                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2136                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2137                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2138                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2139                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2140                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2141                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2142                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2143                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2144                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2145                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2146                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2147                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2148                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2149                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2150                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2151                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2152                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2153                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2154
2155                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2156                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2157                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2158                                 NUM_BANKS(ADDR_SURF_16_BANK));
2159                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2160                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2161                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2162                                 NUM_BANKS(ADDR_SURF_16_BANK));
2163                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2164                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2165                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2166                                 NUM_BANKS(ADDR_SURF_16_BANK));
2167                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2168                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2169                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2170                                 NUM_BANKS(ADDR_SURF_16_BANK));
2171                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2172                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2173                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2174                                 NUM_BANKS(ADDR_SURF_16_BANK));
2175                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2176                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2177                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2178                                 NUM_BANKS(ADDR_SURF_16_BANK));
2179                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2180                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2181                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2182                                 NUM_BANKS(ADDR_SURF_16_BANK));
2183                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2184                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2185                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2186                                 NUM_BANKS(ADDR_SURF_16_BANK));
2187                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2188                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2189                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2190                                 NUM_BANKS(ADDR_SURF_16_BANK));
2191                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2192                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2193                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2194                                  NUM_BANKS(ADDR_SURF_16_BANK));
2195                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2196                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2197                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2198                                  NUM_BANKS(ADDR_SURF_16_BANK));
2199                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2200                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2201                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2202                                  NUM_BANKS(ADDR_SURF_8_BANK));
2203                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2204                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2205                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2206                                  NUM_BANKS(ADDR_SURF_4_BANK));
2207                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2208                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2209                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2210                                  NUM_BANKS(ADDR_SURF_4_BANK));
2211
2212                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2213                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2214
2215                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2216                         if (reg_offset != 7)
2217                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2218
2219                 break;
2220         case CHIP_STONEY:
2221                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2222                                 PIPE_CONFIG(ADDR_SURF_P2) |
2223                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2224                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2225                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2226                                 PIPE_CONFIG(ADDR_SURF_P2) |
2227                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2228                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2229                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2230                                 PIPE_CONFIG(ADDR_SURF_P2) |
2231                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2232                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2233                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2234                                 PIPE_CONFIG(ADDR_SURF_P2) |
2235                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2236                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2237                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2238                                 PIPE_CONFIG(ADDR_SURF_P2) |
2239                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2240                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2241                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2242                                 PIPE_CONFIG(ADDR_SURF_P2) |
2243                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2244                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2245                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2246                                 PIPE_CONFIG(ADDR_SURF_P2) |
2247                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2248                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2249                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2250                                 PIPE_CONFIG(ADDR_SURF_P2));
2251                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2252                                 PIPE_CONFIG(ADDR_SURF_P2) |
2253                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2254                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2255                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2256                                  PIPE_CONFIG(ADDR_SURF_P2) |
2257                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2258                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2259                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2260                                  PIPE_CONFIG(ADDR_SURF_P2) |
2261                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2262                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2263                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2264                                  PIPE_CONFIG(ADDR_SURF_P2) |
2265                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2266                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2267                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2268                                  PIPE_CONFIG(ADDR_SURF_P2) |
2269                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2270                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2271                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2272                                  PIPE_CONFIG(ADDR_SURF_P2) |
2273                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2274                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2275                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2276                                  PIPE_CONFIG(ADDR_SURF_P2) |
2277                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2278                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2279                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2280                                  PIPE_CONFIG(ADDR_SURF_P2) |
2281                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2282                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2283                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2284                                  PIPE_CONFIG(ADDR_SURF_P2) |
2285                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2286                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2287                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2288                                  PIPE_CONFIG(ADDR_SURF_P2) |
2289                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2290                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2291                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2292                                  PIPE_CONFIG(ADDR_SURF_P2) |
2293                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2294                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2295                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2296                                  PIPE_CONFIG(ADDR_SURF_P2) |
2297                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2298                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2299                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2300                                  PIPE_CONFIG(ADDR_SURF_P2) |
2301                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2302                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2303                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2304                                  PIPE_CONFIG(ADDR_SURF_P2) |
2305                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2306                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2307                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2308                                  PIPE_CONFIG(ADDR_SURF_P2) |
2309                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2310                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2311                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2312                                  PIPE_CONFIG(ADDR_SURF_P2) |
2313                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2314                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2315                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2316                                  PIPE_CONFIG(ADDR_SURF_P2) |
2317                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2318                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2319                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2320                                  PIPE_CONFIG(ADDR_SURF_P2) |
2321                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2322                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2323
2324                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2325                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2326                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2327                                 NUM_BANKS(ADDR_SURF_8_BANK));
2328                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2329                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2330                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2331                                 NUM_BANKS(ADDR_SURF_8_BANK));
2332                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2333                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2334                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2335                                 NUM_BANKS(ADDR_SURF_8_BANK));
2336                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2337                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2338                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2339                                 NUM_BANKS(ADDR_SURF_8_BANK));
2340                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2341                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2342                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2343                                 NUM_BANKS(ADDR_SURF_8_BANK));
2344                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2345                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2346                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2347                                 NUM_BANKS(ADDR_SURF_8_BANK));
2348                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2349                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2350                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2351                                 NUM_BANKS(ADDR_SURF_8_BANK));
2352                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2353                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2354                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2355                                 NUM_BANKS(ADDR_SURF_16_BANK));
2356                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2357                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2358                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2359                                 NUM_BANKS(ADDR_SURF_16_BANK));
2360                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2361                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2362                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2363                                  NUM_BANKS(ADDR_SURF_16_BANK));
2364                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2365                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2366                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2367                                  NUM_BANKS(ADDR_SURF_16_BANK));
2368                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2369                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2370                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2371                                  NUM_BANKS(ADDR_SURF_16_BANK));
2372                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2373                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2374                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2375                                  NUM_BANKS(ADDR_SURF_16_BANK));
2376                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2377                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2378                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2379                                  NUM_BANKS(ADDR_SURF_8_BANK));
2380
2381                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2382                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2383                             reg_offset != 23)
2384                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2385
2386                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2387                         if (reg_offset != 7)
2388                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2389
2390                 break;
2391         default:
2392                 dev_warn(adev->dev,
2393                          "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
2394                          adev->asic_type);
2395
2396         case CHIP_CARRIZO:
2397                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2398                                 PIPE_CONFIG(ADDR_SURF_P2) |
2399                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2400                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2401                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2402                                 PIPE_CONFIG(ADDR_SURF_P2) |
2403                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2404                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2405                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2406                                 PIPE_CONFIG(ADDR_SURF_P2) |
2407                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2408                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2409                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2410                                 PIPE_CONFIG(ADDR_SURF_P2) |
2411                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2412                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2413                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2414                                 PIPE_CONFIG(ADDR_SURF_P2) |
2415                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2416                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2417                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2418                                 PIPE_CONFIG(ADDR_SURF_P2) |
2419                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2420                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2421                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2422                                 PIPE_CONFIG(ADDR_SURF_P2) |
2423                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2424                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2425                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2426                                 PIPE_CONFIG(ADDR_SURF_P2));
2427                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2428                                 PIPE_CONFIG(ADDR_SURF_P2) |
2429                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2430                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2431                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2432                                  PIPE_CONFIG(ADDR_SURF_P2) |
2433                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2434                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2435                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2436                                  PIPE_CONFIG(ADDR_SURF_P2) |
2437                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2438                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2439                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2440                                  PIPE_CONFIG(ADDR_SURF_P2) |
2441                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2442                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2443                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2444                                  PIPE_CONFIG(ADDR_SURF_P2) |
2445                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2446                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2447                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2448                                  PIPE_CONFIG(ADDR_SURF_P2) |
2449                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2450                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2451                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2452                                  PIPE_CONFIG(ADDR_SURF_P2) |
2453                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2454                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2455                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2456                                  PIPE_CONFIG(ADDR_SURF_P2) |
2457                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2458                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2459                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2460                                  PIPE_CONFIG(ADDR_SURF_P2) |
2461                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2462                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2463                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2464                                  PIPE_CONFIG(ADDR_SURF_P2) |
2465                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2466                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2467                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2468                                  PIPE_CONFIG(ADDR_SURF_P2) |
2469                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2470                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2471                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2472                                  PIPE_CONFIG(ADDR_SURF_P2) |
2473                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2474                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2475                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2476                                  PIPE_CONFIG(ADDR_SURF_P2) |
2477                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2478                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2479                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2480                                  PIPE_CONFIG(ADDR_SURF_P2) |
2481                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2482                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2483                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2484                                  PIPE_CONFIG(ADDR_SURF_P2) |
2485                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2486                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2487                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2488                                  PIPE_CONFIG(ADDR_SURF_P2) |
2489                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2490                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2491                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2492                                  PIPE_CONFIG(ADDR_SURF_P2) |
2493                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2494                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2495                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2496                                  PIPE_CONFIG(ADDR_SURF_P2) |
2497                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2498                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2499
2500                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2501                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2502                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2503                                 NUM_BANKS(ADDR_SURF_8_BANK));
2504                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2505                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2506                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2507                                 NUM_BANKS(ADDR_SURF_8_BANK));
2508                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2509                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2510                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2511                                 NUM_BANKS(ADDR_SURF_8_BANK));
2512                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2513                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2514                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2515                                 NUM_BANKS(ADDR_SURF_8_BANK));
2516                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2517                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2518                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2519                                 NUM_BANKS(ADDR_SURF_8_BANK));
2520                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2521                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2522                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2523                                 NUM_BANKS(ADDR_SURF_8_BANK));
2524                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2525                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2526                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2527                                 NUM_BANKS(ADDR_SURF_8_BANK));
2528                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2529                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2530                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2531                                 NUM_BANKS(ADDR_SURF_16_BANK));
2532                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2533                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2534                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2535                                 NUM_BANKS(ADDR_SURF_16_BANK));
2536                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2537                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2538                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2539                                  NUM_BANKS(ADDR_SURF_16_BANK));
2540                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2541                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2542                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2543                                  NUM_BANKS(ADDR_SURF_16_BANK));
2544                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2545                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2546                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2547                                  NUM_BANKS(ADDR_SURF_16_BANK));
2548                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2549                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2550                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2551                                  NUM_BANKS(ADDR_SURF_16_BANK));
2552                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2553                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2554                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2555                                  NUM_BANKS(ADDR_SURF_8_BANK));
2556
2557                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2558                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2559                             reg_offset != 23)
2560                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2561
2562                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2563                         if (reg_offset != 7)
2564                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2565
2566                 break;
2567         }
2568 }
2569
2570 void gfx_v8_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num)
2571 {
2572         u32 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
2573
2574         if ((se_num == 0xffffffff) && (sh_num == 0xffffffff)) {
2575                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
2576                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
2577         } else if (se_num == 0xffffffff) {
2578                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
2579                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
2580         } else if (sh_num == 0xffffffff) {
2581                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
2582                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
2583         } else {
2584                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
2585                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
2586         }
2587         WREG32(mmGRBM_GFX_INDEX, data);
2588 }
2589
2590 static u32 gfx_v8_0_create_bitmask(u32 bit_width)
2591 {
2592         return (u32)((1ULL << bit_width) - 1);
2593 }
2594
2595 static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
2596 {
2597         u32 data, mask;
2598
2599         data = RREG32(mmCC_RB_BACKEND_DISABLE);
2600         data |= RREG32(mmGC_USER_RB_BACKEND_DISABLE);
2601
2602         data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
2603         data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
2604
2605         mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_backends_per_se /
2606                                        adev->gfx.config.max_sh_per_se);
2607
2608         return (~data) & mask;
2609 }
2610
2611 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
2612 {
2613         int i, j;
2614         u32 data;
2615         u32 active_rbs = 0;
2616         u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
2617                                         adev->gfx.config.max_sh_per_se;
2618
2619         mutex_lock(&adev->grbm_idx_mutex);
2620         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2621                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2622                         gfx_v8_0_select_se_sh(adev, i, j);
2623                         data = gfx_v8_0_get_rb_active_bitmap(adev);
2624                         active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
2625                                                rb_bitmap_width_per_sh);
2626                 }
2627         }
2628         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
2629         mutex_unlock(&adev->grbm_idx_mutex);
2630
2631         adev->gfx.config.backend_enable_mask = active_rbs;
2632         adev->gfx.config.num_rbs = hweight32(active_rbs);
2633 }
2634
2635 /**
2636  * gfx_v8_0_init_compute_vmid - gart enable
2637  *
2638  * @rdev: amdgpu_device pointer
2639  *
2640  * Initialize compute vmid sh_mem registers
2641  *
2642  */
2643 #define DEFAULT_SH_MEM_BASES    (0x6000)
2644 #define FIRST_COMPUTE_VMID      (8)
2645 #define LAST_COMPUTE_VMID       (16)
2646 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
2647 {
2648         int i;
2649         uint32_t sh_mem_config;
2650         uint32_t sh_mem_bases;
2651
2652         /*
2653          * Configure apertures:
2654          * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
2655          * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
2656          * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
2657          */
2658         sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
2659
2660         sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
2661                         SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
2662                         SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
2663                         SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
2664                         MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
2665                         SH_MEM_CONFIG__PRIVATE_ATC_MASK;
2666
2667         mutex_lock(&adev->srbm_mutex);
2668         for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
2669                 vi_srbm_select(adev, 0, 0, 0, i);
2670                 /* CP and shaders */
2671                 WREG32(mmSH_MEM_CONFIG, sh_mem_config);
2672                 WREG32(mmSH_MEM_APE1_BASE, 1);
2673                 WREG32(mmSH_MEM_APE1_LIMIT, 0);
2674                 WREG32(mmSH_MEM_BASES, sh_mem_bases);
2675         }
2676         vi_srbm_select(adev, 0, 0, 0, 0);
2677         mutex_unlock(&adev->srbm_mutex);
2678 }
2679
2680 static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
2681 {
2682         u32 tmp;
2683         int i;
2684
2685         tmp = RREG32(mmGRBM_CNTL);
2686         tmp = REG_SET_FIELD(tmp, GRBM_CNTL, READ_TIMEOUT, 0xff);
2687         WREG32(mmGRBM_CNTL, tmp);
2688
2689         WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
2690         WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
2691         WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
2692
2693         gfx_v8_0_tiling_mode_table_init(adev);
2694
2695         gfx_v8_0_setup_rb(adev);
2696
2697         /* XXX SH_MEM regs */
2698         /* where to put LDS, scratch, GPUVM in FSA64 space */
2699         mutex_lock(&adev->srbm_mutex);
2700         for (i = 0; i < 16; i++) {
2701                 vi_srbm_select(adev, 0, 0, 0, i);
2702                 /* CP and shaders */
2703                 if (i == 0) {
2704                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
2705                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
2706                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
2707                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2708                         WREG32(mmSH_MEM_CONFIG, tmp);
2709                 } else {
2710                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
2711                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_NC);
2712                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
2713                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2714                         WREG32(mmSH_MEM_CONFIG, tmp);
2715                 }
2716
2717                 WREG32(mmSH_MEM_APE1_BASE, 1);
2718                 WREG32(mmSH_MEM_APE1_LIMIT, 0);
2719                 WREG32(mmSH_MEM_BASES, 0);
2720         }
2721         vi_srbm_select(adev, 0, 0, 0, 0);
2722         mutex_unlock(&adev->srbm_mutex);
2723
2724         gfx_v8_0_init_compute_vmid(adev);
2725
2726         mutex_lock(&adev->grbm_idx_mutex);
2727         /*
2728          * making sure that the following register writes will be broadcasted
2729          * to all the shaders
2730          */
2731         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
2732
2733         WREG32(mmPA_SC_FIFO_SIZE,
2734                    (adev->gfx.config.sc_prim_fifo_size_frontend <<
2735                         PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
2736                    (adev->gfx.config.sc_prim_fifo_size_backend <<
2737                         PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
2738                    (adev->gfx.config.sc_hiz_tile_fifo_size <<
2739                         PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
2740                    (adev->gfx.config.sc_earlyz_tile_fifo_size <<
2741                         PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
2742         mutex_unlock(&adev->grbm_idx_mutex);
2743
2744 }
2745
2746 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
2747 {
2748         u32 i, j, k;
2749         u32 mask;
2750
2751         mutex_lock(&adev->grbm_idx_mutex);
2752         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2753                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2754                         gfx_v8_0_select_se_sh(adev, i, j);
2755                         for (k = 0; k < adev->usec_timeout; k++) {
2756                                 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
2757                                         break;
2758                                 udelay(1);
2759                         }
2760                 }
2761         }
2762         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
2763         mutex_unlock(&adev->grbm_idx_mutex);
2764
2765         mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
2766                 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
2767                 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
2768                 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
2769         for (k = 0; k < adev->usec_timeout; k++) {
2770                 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
2771                         break;
2772                 udelay(1);
2773         }
2774 }
2775
2776 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
2777                                                bool enable)
2778 {
2779         u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
2780
2781         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
2782         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
2783         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
2784         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
2785
2786         WREG32(mmCP_INT_CNTL_RING0, tmp);
2787 }
2788
2789 void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
2790 {
2791         u32 tmp = RREG32(mmRLC_CNTL);
2792
2793         tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 0);
2794         WREG32(mmRLC_CNTL, tmp);
2795
2796         gfx_v8_0_enable_gui_idle_interrupt(adev, false);
2797
2798         gfx_v8_0_wait_for_rlc_serdes(adev);
2799 }
2800
2801 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
2802 {
2803         u32 tmp = RREG32(mmGRBM_SOFT_RESET);
2804
2805         tmp = REG_SET_FIELD(tmp, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
2806         WREG32(mmGRBM_SOFT_RESET, tmp);
2807         udelay(50);
2808         tmp = REG_SET_FIELD(tmp, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
2809         WREG32(mmGRBM_SOFT_RESET, tmp);
2810         udelay(50);
2811 }
2812
2813 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
2814 {
2815         u32 tmp = RREG32(mmRLC_CNTL);
2816
2817         tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 1);
2818         WREG32(mmRLC_CNTL, tmp);
2819
2820         /* carrizo do enable cp interrupt after cp inited */
2821         if (!(adev->flags & AMD_IS_APU))
2822                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
2823
2824         udelay(50);
2825 }
2826
2827 static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev)
2828 {
2829         const struct rlc_firmware_header_v2_0 *hdr;
2830         const __le32 *fw_data;
2831         unsigned i, fw_size;
2832
2833         if (!adev->gfx.rlc_fw)
2834                 return -EINVAL;
2835
2836         hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
2837         amdgpu_ucode_print_rlc_hdr(&hdr->header);
2838
2839         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
2840                            le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2841         fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
2842
2843         WREG32(mmRLC_GPM_UCODE_ADDR, 0);
2844         for (i = 0; i < fw_size; i++)
2845                 WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
2846         WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
2847
2848         return 0;
2849 }
2850
2851 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
2852 {
2853         int r;
2854
2855         gfx_v8_0_rlc_stop(adev);
2856
2857         /* disable CG */
2858         WREG32(mmRLC_CGCG_CGLS_CTRL, 0);
2859
2860         /* disable PG */
2861         WREG32(mmRLC_PG_CNTL, 0);
2862
2863         gfx_v8_0_rlc_reset(adev);
2864
2865         if (!adev->pp_enabled) {
2866                 if (!adev->firmware.smu_load) {
2867                         /* legacy rlc firmware loading */
2868                         r = gfx_v8_0_rlc_load_microcode(adev);
2869                         if (r)
2870                                 return r;
2871                 } else {
2872                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
2873                                                         AMDGPU_UCODE_ID_RLC_G);
2874                         if (r)
2875                                 return -EINVAL;
2876                 }
2877         }
2878
2879         gfx_v8_0_rlc_start(adev);
2880
2881         return 0;
2882 }
2883
2884 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
2885 {
2886         int i;
2887         u32 tmp = RREG32(mmCP_ME_CNTL);
2888
2889         if (enable) {
2890                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
2891                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
2892                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
2893         } else {
2894                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
2895                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
2896                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
2897                 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2898                         adev->gfx.gfx_ring[i].ready = false;
2899         }
2900         WREG32(mmCP_ME_CNTL, tmp);
2901         udelay(50);
2902 }
2903
2904 static int gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
2905 {
2906         const struct gfx_firmware_header_v1_0 *pfp_hdr;
2907         const struct gfx_firmware_header_v1_0 *ce_hdr;
2908         const struct gfx_firmware_header_v1_0 *me_hdr;
2909         const __le32 *fw_data;
2910         unsigned i, fw_size;
2911
2912         if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
2913                 return -EINVAL;
2914
2915         pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
2916                 adev->gfx.pfp_fw->data;
2917         ce_hdr = (const struct gfx_firmware_header_v1_0 *)
2918                 adev->gfx.ce_fw->data;
2919         me_hdr = (const struct gfx_firmware_header_v1_0 *)
2920                 adev->gfx.me_fw->data;
2921
2922         amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
2923         amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
2924         amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
2925
2926         gfx_v8_0_cp_gfx_enable(adev, false);
2927
2928         /* PFP */
2929         fw_data = (const __le32 *)
2930                 (adev->gfx.pfp_fw->data +
2931                  le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
2932         fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
2933         WREG32(mmCP_PFP_UCODE_ADDR, 0);
2934         for (i = 0; i < fw_size; i++)
2935                 WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
2936         WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
2937
2938         /* CE */
2939         fw_data = (const __le32 *)
2940                 (adev->gfx.ce_fw->data +
2941                  le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
2942         fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
2943         WREG32(mmCP_CE_UCODE_ADDR, 0);
2944         for (i = 0; i < fw_size; i++)
2945                 WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
2946         WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
2947
2948         /* ME */
2949         fw_data = (const __le32 *)
2950                 (adev->gfx.me_fw->data +
2951                  le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
2952         fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
2953         WREG32(mmCP_ME_RAM_WADDR, 0);
2954         for (i = 0; i < fw_size; i++)
2955                 WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
2956         WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
2957
2958         return 0;
2959 }
2960
2961 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
2962 {
2963         u32 count = 0;
2964         const struct cs_section_def *sect = NULL;
2965         const struct cs_extent_def *ext = NULL;
2966
2967         /* begin clear state */
2968         count += 2;
2969         /* context control state */
2970         count += 3;
2971
2972         for (sect = vi_cs_data; sect->section != NULL; ++sect) {
2973                 for (ext = sect->section; ext->extent != NULL; ++ext) {
2974                         if (sect->id == SECT_CONTEXT)
2975                                 count += 2 + ext->reg_count;
2976                         else
2977                                 return 0;
2978                 }
2979         }
2980         /* pa_sc_raster_config/pa_sc_raster_config1 */
2981         count += 4;
2982         /* end clear state */
2983         count += 2;
2984         /* clear state */
2985         count += 2;
2986
2987         return count;
2988 }
2989
2990 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
2991 {
2992         struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
2993         const struct cs_section_def *sect = NULL;
2994         const struct cs_extent_def *ext = NULL;
2995         int r, i;
2996
2997         /* init the CP */
2998         WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
2999         WREG32(mmCP_ENDIAN_SWAP, 0);
3000         WREG32(mmCP_DEVICE_ID, 1);
3001
3002         gfx_v8_0_cp_gfx_enable(adev, true);
3003
3004         r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
3005         if (r) {
3006                 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
3007                 return r;
3008         }
3009
3010         /* clear state buffer */
3011         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3012         amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3013
3014         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3015         amdgpu_ring_write(ring, 0x80000000);
3016         amdgpu_ring_write(ring, 0x80000000);
3017
3018         for (sect = vi_cs_data; sect->section != NULL; ++sect) {
3019                 for (ext = sect->section; ext->extent != NULL; ++ext) {
3020                         if (sect->id == SECT_CONTEXT) {
3021                                 amdgpu_ring_write(ring,
3022                                        PACKET3(PACKET3_SET_CONTEXT_REG,
3023                                                ext->reg_count));
3024                                 amdgpu_ring_write(ring,
3025                                        ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
3026                                 for (i = 0; i < ext->reg_count; i++)
3027                                         amdgpu_ring_write(ring, ext->extent[i]);
3028                         }
3029                 }
3030         }
3031
3032         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3033         amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
3034         switch (adev->asic_type) {
3035         case CHIP_TONGA:
3036                 amdgpu_ring_write(ring, 0x16000012);
3037                 amdgpu_ring_write(ring, 0x0000002A);
3038                 break;
3039         case CHIP_FIJI:
3040                 amdgpu_ring_write(ring, 0x3a00161a);
3041                 amdgpu_ring_write(ring, 0x0000002e);
3042                 break;
3043         case CHIP_TOPAZ:
3044         case CHIP_CARRIZO:
3045                 amdgpu_ring_write(ring, 0x00000002);
3046                 amdgpu_ring_write(ring, 0x00000000);
3047                 break;
3048         case CHIP_STONEY:
3049                 amdgpu_ring_write(ring, 0x00000000);
3050                 amdgpu_ring_write(ring, 0x00000000);
3051                 break;
3052         default:
3053                 BUG();
3054         }
3055
3056         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3057         amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3058
3059         amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3060         amdgpu_ring_write(ring, 0);
3061
3062         /* init the CE partitions */
3063         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3064         amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3065         amdgpu_ring_write(ring, 0x8000);
3066         amdgpu_ring_write(ring, 0x8000);
3067
3068         amdgpu_ring_commit(ring);
3069
3070         return 0;
3071 }
3072
3073 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
3074 {
3075         struct amdgpu_ring *ring;
3076         u32 tmp;
3077         u32 rb_bufsz;
3078         u64 rb_addr, rptr_addr;
3079         int r;
3080
3081         /* Set the write pointer delay */
3082         WREG32(mmCP_RB_WPTR_DELAY, 0);
3083
3084         /* set the RB to use vmid 0 */
3085         WREG32(mmCP_RB_VMID, 0);
3086
3087         /* Set ring buffer size */
3088         ring = &adev->gfx.gfx_ring[0];
3089         rb_bufsz = order_base_2(ring->ring_size / 8);
3090         tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
3091         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
3092         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
3093         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
3094 #ifdef __BIG_ENDIAN
3095         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
3096 #endif
3097         WREG32(mmCP_RB0_CNTL, tmp);
3098
3099         /* Initialize the ring buffer's read and write pointers */
3100         WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
3101         ring->wptr = 0;
3102         WREG32(mmCP_RB0_WPTR, ring->wptr);
3103
3104         /* set the wb address wether it's enabled or not */
3105         rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
3106         WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
3107         WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
3108
3109         mdelay(1);
3110         WREG32(mmCP_RB0_CNTL, tmp);
3111
3112         rb_addr = ring->gpu_addr >> 8;
3113         WREG32(mmCP_RB0_BASE, rb_addr);
3114         WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
3115
3116         /* no gfx doorbells on iceland */
3117         if (adev->asic_type != CHIP_TOPAZ) {
3118                 tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
3119                 if (ring->use_doorbell) {
3120                         tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3121                                             DOORBELL_OFFSET, ring->doorbell_index);
3122                         tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3123                                             DOORBELL_EN, 1);
3124                 } else {
3125                         tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3126                                             DOORBELL_EN, 0);
3127                 }
3128                 WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
3129
3130                 if (adev->asic_type == CHIP_TONGA) {
3131                         tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
3132                                             DOORBELL_RANGE_LOWER,
3133                                             AMDGPU_DOORBELL_GFX_RING0);
3134                         WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
3135
3136                         WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
3137                                CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
3138                 }
3139
3140         }
3141
3142         /* start the ring */
3143         gfx_v8_0_cp_gfx_start(adev);
3144         ring->ready = true;
3145         r = amdgpu_ring_test_ring(ring);
3146         if (r) {
3147                 ring->ready = false;
3148                 return r;
3149         }
3150
3151         return 0;
3152 }
3153
3154 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
3155 {
3156         int i;
3157
3158         if (enable) {
3159                 WREG32(mmCP_MEC_CNTL, 0);
3160         } else {
3161                 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
3162                 for (i = 0; i < adev->gfx.num_compute_rings; i++)
3163                         adev->gfx.compute_ring[i].ready = false;
3164         }
3165         udelay(50);
3166 }
3167
3168 static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev)
3169 {
3170         const struct gfx_firmware_header_v1_0 *mec_hdr;
3171         const __le32 *fw_data;
3172         unsigned i, fw_size;
3173
3174         if (!adev->gfx.mec_fw)
3175                 return -EINVAL;
3176
3177         gfx_v8_0_cp_compute_enable(adev, false);
3178
3179         mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
3180         amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
3181
3182         fw_data = (const __le32 *)
3183                 (adev->gfx.mec_fw->data +
3184                  le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
3185         fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
3186
3187         /* MEC1 */
3188         WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0);
3189         for (i = 0; i < fw_size; i++)
3190                 WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data+i));
3191         WREG32(mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version);
3192
3193         /* Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
3194         if (adev->gfx.mec2_fw) {
3195                 const struct gfx_firmware_header_v1_0 *mec2_hdr;
3196
3197                 mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
3198                 amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header);
3199
3200                 fw_data = (const __le32 *)
3201                         (adev->gfx.mec2_fw->data +
3202                          le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
3203                 fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
3204
3205                 WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0);
3206                 for (i = 0; i < fw_size; i++)
3207                         WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data+i));
3208                 WREG32(mmCP_MEC_ME2_UCODE_ADDR, adev->gfx.mec2_fw_version);
3209         }
3210
3211         return 0;
3212 }
3213
3214 struct vi_mqd {
3215         uint32_t header;  /* ordinal0 */
3216         uint32_t compute_dispatch_initiator;  /* ordinal1 */
3217         uint32_t compute_dim_x;  /* ordinal2 */
3218         uint32_t compute_dim_y;  /* ordinal3 */
3219         uint32_t compute_dim_z;  /* ordinal4 */
3220         uint32_t compute_start_x;  /* ordinal5 */
3221         uint32_t compute_start_y;  /* ordinal6 */
3222         uint32_t compute_start_z;  /* ordinal7 */
3223         uint32_t compute_num_thread_x;  /* ordinal8 */
3224         uint32_t compute_num_thread_y;  /* ordinal9 */
3225         uint32_t compute_num_thread_z;  /* ordinal10 */
3226         uint32_t compute_pipelinestat_enable;  /* ordinal11 */
3227         uint32_t compute_perfcount_enable;  /* ordinal12 */
3228         uint32_t compute_pgm_lo;  /* ordinal13 */
3229         uint32_t compute_pgm_hi;  /* ordinal14 */
3230         uint32_t compute_tba_lo;  /* ordinal15 */
3231         uint32_t compute_tba_hi;  /* ordinal16 */
3232         uint32_t compute_tma_lo;  /* ordinal17 */
3233         uint32_t compute_tma_hi;  /* ordinal18 */
3234         uint32_t compute_pgm_rsrc1;  /* ordinal19 */
3235         uint32_t compute_pgm_rsrc2;  /* ordinal20 */
3236         uint32_t compute_vmid;  /* ordinal21 */
3237         uint32_t compute_resource_limits;  /* ordinal22 */
3238         uint32_t compute_static_thread_mgmt_se0;  /* ordinal23 */
3239         uint32_t compute_static_thread_mgmt_se1;  /* ordinal24 */
3240         uint32_t compute_tmpring_size;  /* ordinal25 */
3241         uint32_t compute_static_thread_mgmt_se2;  /* ordinal26 */
3242         uint32_t compute_static_thread_mgmt_se3;  /* ordinal27 */
3243         uint32_t compute_restart_x;  /* ordinal28 */
3244         uint32_t compute_restart_y;  /* ordinal29 */
3245         uint32_t compute_restart_z;  /* ordinal30 */
3246         uint32_t compute_thread_trace_enable;  /* ordinal31 */
3247         uint32_t compute_misc_reserved;  /* ordinal32 */
3248         uint32_t compute_dispatch_id;  /* ordinal33 */
3249         uint32_t compute_threadgroup_id;  /* ordinal34 */
3250         uint32_t compute_relaunch;  /* ordinal35 */
3251         uint32_t compute_wave_restore_addr_lo;  /* ordinal36 */
3252         uint32_t compute_wave_restore_addr_hi;  /* ordinal37 */
3253         uint32_t compute_wave_restore_control;  /* ordinal38 */
3254         uint32_t reserved9;  /* ordinal39 */
3255         uint32_t reserved10;  /* ordinal40 */
3256         uint32_t reserved11;  /* ordinal41 */
3257         uint32_t reserved12;  /* ordinal42 */
3258         uint32_t reserved13;  /* ordinal43 */
3259         uint32_t reserved14;  /* ordinal44 */
3260         uint32_t reserved15;  /* ordinal45 */
3261         uint32_t reserved16;  /* ordinal46 */
3262         uint32_t reserved17;  /* ordinal47 */
3263         uint32_t reserved18;  /* ordinal48 */
3264         uint32_t reserved19;  /* ordinal49 */
3265         uint32_t reserved20;  /* ordinal50 */
3266         uint32_t reserved21;  /* ordinal51 */
3267         uint32_t reserved22;  /* ordinal52 */
3268         uint32_t reserved23;  /* ordinal53 */
3269         uint32_t reserved24;  /* ordinal54 */
3270         uint32_t reserved25;  /* ordinal55 */
3271         uint32_t reserved26;  /* ordinal56 */
3272         uint32_t reserved27;  /* ordinal57 */
3273         uint32_t reserved28;  /* ordinal58 */
3274         uint32_t reserved29;  /* ordinal59 */
3275         uint32_t reserved30;  /* ordinal60 */
3276         uint32_t reserved31;  /* ordinal61 */
3277         uint32_t reserved32;  /* ordinal62 */
3278         uint32_t reserved33;  /* ordinal63 */
3279         uint32_t reserved34;  /* ordinal64 */
3280         uint32_t compute_user_data_0;  /* ordinal65 */
3281         uint32_t compute_user_data_1;  /* ordinal66 */
3282         uint32_t compute_user_data_2;  /* ordinal67 */
3283         uint32_t compute_user_data_3;  /* ordinal68 */
3284         uint32_t compute_user_data_4;  /* ordinal69 */
3285         uint32_t compute_user_data_5;  /* ordinal70 */
3286         uint32_t compute_user_data_6;  /* ordinal71 */
3287         uint32_t compute_user_data_7;  /* ordinal72 */
3288         uint32_t compute_user_data_8;  /* ordinal73 */
3289         uint32_t compute_user_data_9;  /* ordinal74 */
3290         uint32_t compute_user_data_10;  /* ordinal75 */
3291         uint32_t compute_user_data_11;  /* ordinal76 */
3292         uint32_t compute_user_data_12;  /* ordinal77 */
3293         uint32_t compute_user_data_13;  /* ordinal78 */
3294         uint32_t compute_user_data_14;  /* ordinal79 */
3295         uint32_t compute_user_data_15;  /* ordinal80 */
3296         uint32_t cp_compute_csinvoc_count_lo;  /* ordinal81 */
3297         uint32_t cp_compute_csinvoc_count_hi;  /* ordinal82 */
3298         uint32_t reserved35;  /* ordinal83 */
3299         uint32_t reserved36;  /* ordinal84 */
3300         uint32_t reserved37;  /* ordinal85 */
3301         uint32_t cp_mqd_query_time_lo;  /* ordinal86 */
3302         uint32_t cp_mqd_query_time_hi;  /* ordinal87 */
3303         uint32_t cp_mqd_connect_start_time_lo;  /* ordinal88 */
3304         uint32_t cp_mqd_connect_start_time_hi;  /* ordinal89 */
3305         uint32_t cp_mqd_connect_end_time_lo;  /* ordinal90 */
3306         uint32_t cp_mqd_connect_end_time_hi;  /* ordinal91 */
3307         uint32_t cp_mqd_connect_end_wf_count;  /* ordinal92 */
3308         uint32_t cp_mqd_connect_end_pq_rptr;  /* ordinal93 */
3309         uint32_t cp_mqd_connect_end_pq_wptr;  /* ordinal94 */
3310         uint32_t cp_mqd_connect_end_ib_rptr;  /* ordinal95 */
3311         uint32_t reserved38;  /* ordinal96 */
3312         uint32_t reserved39;  /* ordinal97 */
3313         uint32_t cp_mqd_save_start_time_lo;  /* ordinal98 */
3314         uint32_t cp_mqd_save_start_time_hi;  /* ordinal99 */
3315         uint32_t cp_mqd_save_end_time_lo;  /* ordinal100 */
3316         uint32_t cp_mqd_save_end_time_hi;  /* ordinal101 */
3317         uint32_t cp_mqd_restore_start_time_lo;  /* ordinal102 */
3318         uint32_t cp_mqd_restore_start_time_hi;  /* ordinal103 */
3319         uint32_t cp_mqd_restore_end_time_lo;  /* ordinal104 */
3320         uint32_t cp_mqd_restore_end_time_hi;  /* ordinal105 */
3321         uint32_t reserved40;  /* ordinal106 */
3322         uint32_t reserved41;  /* ordinal107 */
3323         uint32_t gds_cs_ctxsw_cnt0;  /* ordinal108 */
3324         uint32_t gds_cs_ctxsw_cnt1;  /* ordinal109 */
3325         uint32_t gds_cs_ctxsw_cnt2;  /* ordinal110 */
3326         uint32_t gds_cs_ctxsw_cnt3;  /* ordinal111 */
3327         uint32_t reserved42;  /* ordinal112 */
3328         uint32_t reserved43;  /* ordinal113 */
3329         uint32_t cp_pq_exe_status_lo;  /* ordinal114 */
3330         uint32_t cp_pq_exe_status_hi;  /* ordinal115 */
3331         uint32_t cp_packet_id_lo;  /* ordinal116 */
3332         uint32_t cp_packet_id_hi;  /* ordinal117 */
3333         uint32_t cp_packet_exe_status_lo;  /* ordinal118 */
3334         uint32_t cp_packet_exe_status_hi;  /* ordinal119 */
3335         uint32_t gds_save_base_addr_lo;  /* ordinal120 */
3336         uint32_t gds_save_base_addr_hi;  /* ordinal121 */
3337         uint32_t gds_save_mask_lo;  /* ordinal122 */
3338         uint32_t gds_save_mask_hi;  /* ordinal123 */
3339         uint32_t ctx_save_base_addr_lo;  /* ordinal124 */
3340         uint32_t ctx_save_base_addr_hi;  /* ordinal125 */
3341         uint32_t reserved44;  /* ordinal126 */
3342         uint32_t reserved45;  /* ordinal127 */
3343         uint32_t cp_mqd_base_addr_lo;  /* ordinal128 */
3344         uint32_t cp_mqd_base_addr_hi;  /* ordinal129 */
3345         uint32_t cp_hqd_active;  /* ordinal130 */
3346         uint32_t cp_hqd_vmid;  /* ordinal131 */
3347         uint32_t cp_hqd_persistent_state;  /* ordinal132 */
3348         uint32_t cp_hqd_pipe_priority;  /* ordinal133 */
3349         uint32_t cp_hqd_queue_priority;  /* ordinal134 */
3350         uint32_t cp_hqd_quantum;  /* ordinal135 */
3351         uint32_t cp_hqd_pq_base_lo;  /* ordinal136 */
3352         uint32_t cp_hqd_pq_base_hi;  /* ordinal137 */
3353         uint32_t cp_hqd_pq_rptr;  /* ordinal138 */
3354         uint32_t cp_hqd_pq_rptr_report_addr_lo;  /* ordinal139 */
3355         uint32_t cp_hqd_pq_rptr_report_addr_hi;  /* ordinal140 */
3356         uint32_t cp_hqd_pq_wptr_poll_addr;  /* ordinal141 */
3357         uint32_t cp_hqd_pq_wptr_poll_addr_hi;  /* ordinal142 */
3358         uint32_t cp_hqd_pq_doorbell_control;  /* ordinal143 */
3359         uint32_t cp_hqd_pq_wptr;  /* ordinal144 */
3360         uint32_t cp_hqd_pq_control;  /* ordinal145 */
3361         uint32_t cp_hqd_ib_base_addr_lo;  /* ordinal146 */
3362         uint32_t cp_hqd_ib_base_addr_hi;  /* ordinal147 */
3363         uint32_t cp_hqd_ib_rptr;  /* ordinal148 */
3364         uint32_t cp_hqd_ib_control;  /* ordinal149 */
3365         uint32_t cp_hqd_iq_timer;  /* ordinal150 */
3366         uint32_t cp_hqd_iq_rptr;  /* ordinal151 */
3367         uint32_t cp_hqd_dequeue_request;  /* ordinal152 */
3368         uint32_t cp_hqd_dma_offload;  /* ordinal153 */
3369         uint32_t cp_hqd_sema_cmd;  /* ordinal154 */
3370         uint32_t cp_hqd_msg_type;  /* ordinal155 */
3371         uint32_t cp_hqd_atomic0_preop_lo;  /* ordinal156 */
3372         uint32_t cp_hqd_atomic0_preop_hi;  /* ordinal157 */
3373         uint32_t cp_hqd_atomic1_preop_lo;  /* ordinal158 */
3374         uint32_t cp_hqd_atomic1_preop_hi;  /* ordinal159 */
3375         uint32_t cp_hqd_hq_status0;  /* ordinal160 */
3376         uint32_t cp_hqd_hq_control0;  /* ordinal161 */
3377         uint32_t cp_mqd_control;  /* ordinal162 */
3378         uint32_t cp_hqd_hq_status1;  /* ordinal163 */
3379         uint32_t cp_hqd_hq_control1;  /* ordinal164 */
3380         uint32_t cp_hqd_eop_base_addr_lo;  /* ordinal165 */
3381         uint32_t cp_hqd_eop_base_addr_hi;  /* ordinal166 */
3382         uint32_t cp_hqd_eop_control;  /* ordinal167 */
3383         uint32_t cp_hqd_eop_rptr;  /* ordinal168 */
3384         uint32_t cp_hqd_eop_wptr;  /* ordinal169 */
3385         uint32_t cp_hqd_eop_done_events;  /* ordinal170 */
3386         uint32_t cp_hqd_ctx_save_base_addr_lo;  /* ordinal171 */
3387         uint32_t cp_hqd_ctx_save_base_addr_hi;  /* ordinal172 */
3388         uint32_t cp_hqd_ctx_save_control;  /* ordinal173 */
3389         uint32_t cp_hqd_cntl_stack_offset;  /* ordinal174 */
3390         uint32_t cp_hqd_cntl_stack_size;  /* ordinal175 */
3391         uint32_t cp_hqd_wg_state_offset;  /* ordinal176 */
3392         uint32_t cp_hqd_ctx_save_size;  /* ordinal177 */
3393         uint32_t cp_hqd_gds_resource_state;  /* ordinal178 */
3394         uint32_t cp_hqd_error;  /* ordinal179 */
3395         uint32_t cp_hqd_eop_wptr_mem;  /* ordinal180 */
3396         uint32_t cp_hqd_eop_dones;  /* ordinal181 */
3397         uint32_t reserved46;  /* ordinal182 */
3398         uint32_t reserved47;  /* ordinal183 */
3399         uint32_t reserved48;  /* ordinal184 */
3400         uint32_t reserved49;  /* ordinal185 */
3401         uint32_t reserved50;  /* ordinal186 */
3402         uint32_t reserved51;  /* ordinal187 */
3403         uint32_t reserved52;  /* ordinal188 */
3404         uint32_t reserved53;  /* ordinal189 */
3405         uint32_t reserved54;  /* ordinal190 */
3406         uint32_t reserved55;  /* ordinal191 */
3407         uint32_t iqtimer_pkt_header;  /* ordinal192 */
3408         uint32_t iqtimer_pkt_dw0;  /* ordinal193 */
3409         uint32_t iqtimer_pkt_dw1;  /* ordinal194 */
3410         uint32_t iqtimer_pkt_dw2;  /* ordinal195 */
3411         uint32_t iqtimer_pkt_dw3;  /* ordinal196 */
3412         uint32_t iqtimer_pkt_dw4;  /* ordinal197 */
3413         uint32_t iqtimer_pkt_dw5;  /* ordinal198 */
3414         uint32_t iqtimer_pkt_dw6;  /* ordinal199 */
3415         uint32_t iqtimer_pkt_dw7;  /* ordinal200 */
3416         uint32_t iqtimer_pkt_dw8;  /* ordinal201 */
3417         uint32_t iqtimer_pkt_dw9;  /* ordinal202 */
3418         uint32_t iqtimer_pkt_dw10;  /* ordinal203 */
3419         uint32_t iqtimer_pkt_dw11;  /* ordinal204 */
3420         uint32_t iqtimer_pkt_dw12;  /* ordinal205 */
3421         uint32_t iqtimer_pkt_dw13;  /* ordinal206 */
3422         uint32_t iqtimer_pkt_dw14;  /* ordinal207 */
3423         uint32_t iqtimer_pkt_dw15;  /* ordinal208 */
3424         uint32_t iqtimer_pkt_dw16;  /* ordinal209 */
3425         uint32_t iqtimer_pkt_dw17;  /* ordinal210 */
3426         uint32_t iqtimer_pkt_dw18;  /* ordinal211 */
3427         uint32_t iqtimer_pkt_dw19;  /* ordinal212 */
3428         uint32_t iqtimer_pkt_dw20;  /* ordinal213 */
3429         uint32_t iqtimer_pkt_dw21;  /* ordinal214 */
3430         uint32_t iqtimer_pkt_dw22;  /* ordinal215 */
3431         uint32_t iqtimer_pkt_dw23;  /* ordinal216 */
3432         uint32_t iqtimer_pkt_dw24;  /* ordinal217 */
3433         uint32_t iqtimer_pkt_dw25;  /* ordinal218 */
3434         uint32_t iqtimer_pkt_dw26;  /* ordinal219 */
3435         uint32_t iqtimer_pkt_dw27;  /* ordinal220 */
3436         uint32_t iqtimer_pkt_dw28;  /* ordinal221 */
3437         uint32_t iqtimer_pkt_dw29;  /* ordinal222 */
3438         uint32_t iqtimer_pkt_dw30;  /* ordinal223 */
3439         uint32_t iqtimer_pkt_dw31;  /* ordinal224 */
3440         uint32_t reserved56;  /* ordinal225 */
3441         uint32_t reserved57;  /* ordinal226 */
3442         uint32_t reserved58;  /* ordinal227 */
3443         uint32_t set_resources_header;  /* ordinal228 */
3444         uint32_t set_resources_dw1;  /* ordinal229 */
3445         uint32_t set_resources_dw2;  /* ordinal230 */
3446         uint32_t set_resources_dw3;  /* ordinal231 */
3447         uint32_t set_resources_dw4;  /* ordinal232 */
3448         uint32_t set_resources_dw5;  /* ordinal233 */
3449         uint32_t set_resources_dw6;  /* ordinal234 */
3450         uint32_t set_resources_dw7;  /* ordinal235 */
3451         uint32_t reserved59;  /* ordinal236 */
3452         uint32_t reserved60;  /* ordinal237 */
3453         uint32_t reserved61;  /* ordinal238 */
3454         uint32_t reserved62;  /* ordinal239 */
3455         uint32_t reserved63;  /* ordinal240 */
3456         uint32_t reserved64;  /* ordinal241 */
3457         uint32_t reserved65;  /* ordinal242 */
3458         uint32_t reserved66;  /* ordinal243 */
3459         uint32_t reserved67;  /* ordinal244 */
3460         uint32_t reserved68;  /* ordinal245 */
3461         uint32_t reserved69;  /* ordinal246 */
3462         uint32_t reserved70;  /* ordinal247 */
3463         uint32_t reserved71;  /* ordinal248 */
3464         uint32_t reserved72;  /* ordinal249 */
3465         uint32_t reserved73;  /* ordinal250 */
3466         uint32_t reserved74;  /* ordinal251 */
3467         uint32_t reserved75;  /* ordinal252 */
3468         uint32_t reserved76;  /* ordinal253 */
3469         uint32_t reserved77;  /* ordinal254 */
3470         uint32_t reserved78;  /* ordinal255 */
3471
3472         uint32_t reserved_t[256]; /* Reserve 256 dword buffer used by ucode */
3473 };
3474
3475 static void gfx_v8_0_cp_compute_fini(struct amdgpu_device *adev)
3476 {
3477         int i, r;
3478
3479         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3480                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
3481
3482                 if (ring->mqd_obj) {
3483                         r = amdgpu_bo_reserve(ring->mqd_obj, false);
3484                         if (unlikely(r != 0))
3485                                 dev_warn(adev->dev, "(%d) reserve MQD bo failed\n", r);
3486
3487                         amdgpu_bo_unpin(ring->mqd_obj);
3488                         amdgpu_bo_unreserve(ring->mqd_obj);
3489
3490                         amdgpu_bo_unref(&ring->mqd_obj);
3491                         ring->mqd_obj = NULL;
3492                 }
3493         }
3494 }
3495
3496 static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev)
3497 {
3498         int r, i, j;
3499         u32 tmp;
3500         bool use_doorbell = true;
3501         u64 hqd_gpu_addr;
3502         u64 mqd_gpu_addr;
3503         u64 eop_gpu_addr;
3504         u64 wb_gpu_addr;
3505         u32 *buf;
3506         struct vi_mqd *mqd;
3507
3508         /* init the pipes */
3509         mutex_lock(&adev->srbm_mutex);
3510         for (i = 0; i < (adev->gfx.mec.num_pipe * adev->gfx.mec.num_mec); i++) {
3511                 int me = (i < 4) ? 1 : 2;
3512                 int pipe = (i < 4) ? i : (i - 4);
3513
3514                 eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE);
3515                 eop_gpu_addr >>= 8;
3516
3517                 vi_srbm_select(adev, me, pipe, 0, 0);
3518
3519                 /* write the EOP addr */
3520                 WREG32(mmCP_HQD_EOP_BASE_ADDR, eop_gpu_addr);
3521                 WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr));
3522
3523                 /* set the VMID assigned */
3524                 WREG32(mmCP_HQD_VMID, 0);
3525
3526                 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3527                 tmp = RREG32(mmCP_HQD_EOP_CONTROL);
3528                 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
3529                                     (order_base_2(MEC_HPD_SIZE / 4) - 1));
3530                 WREG32(mmCP_HQD_EOP_CONTROL, tmp);
3531         }
3532         vi_srbm_select(adev, 0, 0, 0, 0);
3533         mutex_unlock(&adev->srbm_mutex);
3534
3535         /* init the queues.  Just two for now. */
3536         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3537                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
3538
3539                 if (ring->mqd_obj == NULL) {
3540                         r = amdgpu_bo_create(adev,
3541                                              sizeof(struct vi_mqd),
3542                                              PAGE_SIZE, true,
3543                                              AMDGPU_GEM_DOMAIN_GTT, 0, NULL,
3544                                              NULL, &ring->mqd_obj);
3545                         if (r) {
3546                                 dev_warn(adev->dev, "(%d) create MQD bo failed\n", r);
3547                                 return r;
3548                         }
3549                 }
3550
3551                 r = amdgpu_bo_reserve(ring->mqd_obj, false);
3552                 if (unlikely(r != 0)) {
3553                         gfx_v8_0_cp_compute_fini(adev);
3554                         return r;
3555                 }
3556                 r = amdgpu_bo_pin(ring->mqd_obj, AMDGPU_GEM_DOMAIN_GTT,
3557                                   &mqd_gpu_addr);
3558                 if (r) {
3559                         dev_warn(adev->dev, "(%d) pin MQD bo failed\n", r);
3560                         gfx_v8_0_cp_compute_fini(adev);
3561                         return r;
3562                 }
3563                 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&buf);
3564                 if (r) {
3565                         dev_warn(adev->dev, "(%d) map MQD bo failed\n", r);
3566                         gfx_v8_0_cp_compute_fini(adev);
3567                         return r;
3568                 }
3569
3570                 /* init the mqd struct */
3571                 memset(buf, 0, sizeof(struct vi_mqd));
3572
3573                 mqd = (struct vi_mqd *)buf;
3574                 mqd->header = 0xC0310800;
3575                 mqd->compute_pipelinestat_enable = 0x00000001;
3576                 mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
3577                 mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
3578                 mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
3579                 mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
3580                 mqd->compute_misc_reserved = 0x00000003;
3581
3582                 mutex_lock(&adev->srbm_mutex);
3583                 vi_srbm_select(adev, ring->me,
3584                                ring->pipe,
3585                                ring->queue, 0);
3586
3587                 /* disable wptr polling */
3588                 tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL);
3589                 tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3590                 WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp);
3591
3592                 mqd->cp_hqd_eop_base_addr_lo =
3593                         RREG32(mmCP_HQD_EOP_BASE_ADDR);
3594                 mqd->cp_hqd_eop_base_addr_hi =
3595                         RREG32(mmCP_HQD_EOP_BASE_ADDR_HI);
3596
3597                 /* enable doorbell? */
3598                 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
3599                 if (use_doorbell) {
3600                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
3601                 } else {
3602                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 0);
3603                 }
3604                 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, tmp);
3605                 mqd->cp_hqd_pq_doorbell_control = tmp;
3606
3607                 /* disable the queue if it's active */
3608                 mqd->cp_hqd_dequeue_request = 0;
3609                 mqd->cp_hqd_pq_rptr = 0;
3610                 mqd->cp_hqd_pq_wptr= 0;
3611                 if (RREG32(mmCP_HQD_ACTIVE) & 1) {
3612                         WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1);
3613                         for (j = 0; j < adev->usec_timeout; j++) {
3614                                 if (!(RREG32(mmCP_HQD_ACTIVE) & 1))
3615                                         break;
3616                                 udelay(1);
3617                         }
3618                         WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request);
3619                         WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr);
3620                         WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
3621                 }
3622
3623                 /* set the pointer to the MQD */
3624                 mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffffffc;
3625                 mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
3626                 WREG32(mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo);
3627                 WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
3628
3629                 /* set MQD vmid to 0 */
3630                 tmp = RREG32(mmCP_MQD_CONTROL);
3631                 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
3632                 WREG32(mmCP_MQD_CONTROL, tmp);
3633                 mqd->cp_mqd_control = tmp;
3634
3635                 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3636                 hqd_gpu_addr = ring->gpu_addr >> 8;
3637                 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
3638                 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3639                 WREG32(mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo);
3640                 WREG32(mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi);
3641
3642                 /* set up the HQD, this is similar to CP_RB0_CNTL */
3643                 tmp = RREG32(mmCP_HQD_PQ_CONTROL);
3644                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
3645                                     (order_base_2(ring->ring_size / 4) - 1));
3646                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
3647                                ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
3648 #ifdef __BIG_ENDIAN
3649                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
3650 #endif
3651                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
3652                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
3653                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
3654                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
3655                 WREG32(mmCP_HQD_PQ_CONTROL, tmp);
3656                 mqd->cp_hqd_pq_control = tmp;
3657
3658                 /* set the wb address wether it's enabled or not */
3659                 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
3660                 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
3661                 mqd->cp_hqd_pq_rptr_report_addr_hi =
3662                         upper_32_bits(wb_gpu_addr) & 0xffff;
3663                 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR,
3664                        mqd->cp_hqd_pq_rptr_report_addr_lo);
3665                 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3666                        mqd->cp_hqd_pq_rptr_report_addr_hi);
3667
3668                 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3669                 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
3670                 mqd->cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
3671                 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3672                 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr);
3673                 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
3674                        mqd->cp_hqd_pq_wptr_poll_addr_hi);
3675
3676                 /* enable the doorbell if requested */
3677                 if (use_doorbell) {
3678                         if ((adev->asic_type == CHIP_CARRIZO) ||
3679                             (adev->asic_type == CHIP_FIJI) ||
3680                             (adev->asic_type == CHIP_STONEY)) {
3681                                 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER,
3682                                        AMDGPU_DOORBELL_KIQ << 2);
3683                                 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER,
3684                                        AMDGPU_DOORBELL_MEC_RING7 << 2);
3685                         }
3686                         tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
3687                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3688                                             DOORBELL_OFFSET, ring->doorbell_index);
3689                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
3690                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_SOURCE, 0);
3691                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_HIT, 0);
3692                         mqd->cp_hqd_pq_doorbell_control = tmp;
3693
3694                 } else {
3695                         mqd->cp_hqd_pq_doorbell_control = 0;
3696                 }
3697                 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL,
3698                        mqd->cp_hqd_pq_doorbell_control);
3699
3700                 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3701                 ring->wptr = 0;
3702                 mqd->cp_hqd_pq_wptr = ring->wptr;
3703                 WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
3704                 mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
3705
3706                 /* set the vmid for the queue */
3707                 mqd->cp_hqd_vmid = 0;
3708                 WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid);
3709
3710                 tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
3711                 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
3712                 WREG32(mmCP_HQD_PERSISTENT_STATE, tmp);
3713                 mqd->cp_hqd_persistent_state = tmp;
3714                 if (adev->asic_type == CHIP_STONEY) {
3715                         tmp = RREG32(mmCP_ME1_PIPE3_INT_CNTL);
3716                         tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE3_INT_CNTL, GENERIC2_INT_ENABLE, 1);
3717                         WREG32(mmCP_ME1_PIPE3_INT_CNTL, tmp);
3718                 }
3719
3720                 /* activate the queue */
3721                 mqd->cp_hqd_active = 1;
3722                 WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active);
3723
3724                 vi_srbm_select(adev, 0, 0, 0, 0);
3725                 mutex_unlock(&adev->srbm_mutex);
3726
3727                 amdgpu_bo_kunmap(ring->mqd_obj);
3728                 amdgpu_bo_unreserve(ring->mqd_obj);
3729         }
3730
3731         if (use_doorbell) {
3732                 tmp = RREG32(mmCP_PQ_STATUS);
3733                 tmp = REG_SET_FIELD(tmp, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
3734                 WREG32(mmCP_PQ_STATUS, tmp);
3735         }
3736
3737         gfx_v8_0_cp_compute_enable(adev, true);
3738
3739         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3740                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
3741
3742                 ring->ready = true;
3743                 r = amdgpu_ring_test_ring(ring);
3744                 if (r)
3745                         ring->ready = false;
3746         }
3747
3748         return 0;
3749 }
3750
3751 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
3752 {
3753         int r;
3754
3755         if (!(adev->flags & AMD_IS_APU))
3756                 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
3757
3758         if (!adev->pp_enabled) {
3759                 if (!adev->firmware.smu_load) {
3760                         /* legacy firmware loading */
3761                         r = gfx_v8_0_cp_gfx_load_microcode(adev);
3762                         if (r)
3763                                 return r;
3764
3765                         r = gfx_v8_0_cp_compute_load_microcode(adev);
3766                         if (r)
3767                                 return r;
3768                 } else {
3769                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
3770                                                         AMDGPU_UCODE_ID_CP_CE);
3771                         if (r)
3772                                 return -EINVAL;
3773
3774                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
3775                                                         AMDGPU_UCODE_ID_CP_PFP);
3776                         if (r)
3777                                 return -EINVAL;
3778
3779                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
3780                                                         AMDGPU_UCODE_ID_CP_ME);
3781                         if (r)
3782                                 return -EINVAL;
3783
3784                         if (adev->asic_type == CHIP_TOPAZ) {
3785                                 r = gfx_v8_0_cp_compute_load_microcode(adev);
3786                                 if (r)
3787                                         return r;
3788                         } else {
3789                                 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
3790                                                                                  AMDGPU_UCODE_ID_CP_MEC1);
3791                                 if (r)
3792                                         return -EINVAL;
3793                         }
3794                 }
3795         }
3796
3797         r = gfx_v8_0_cp_gfx_resume(adev);
3798         if (r)
3799                 return r;
3800
3801         r = gfx_v8_0_cp_compute_resume(adev);
3802         if (r)
3803                 return r;
3804
3805         gfx_v8_0_enable_gui_idle_interrupt(adev, true);
3806
3807         return 0;
3808 }
3809
3810 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
3811 {
3812         gfx_v8_0_cp_gfx_enable(adev, enable);
3813         gfx_v8_0_cp_compute_enable(adev, enable);
3814 }
3815
3816 static int gfx_v8_0_hw_init(void *handle)
3817 {
3818         int r;
3819         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3820
3821         gfx_v8_0_init_golden_registers(adev);
3822
3823         gfx_v8_0_gpu_init(adev);
3824
3825         r = gfx_v8_0_rlc_resume(adev);
3826         if (r)
3827                 return r;
3828
3829         r = gfx_v8_0_cp_resume(adev);
3830         if (r)
3831                 return r;
3832
3833         return r;
3834 }
3835
3836 static int gfx_v8_0_hw_fini(void *handle)
3837 {
3838         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3839
3840         amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
3841         amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
3842         gfx_v8_0_cp_enable(adev, false);
3843         gfx_v8_0_rlc_stop(adev);
3844         gfx_v8_0_cp_compute_fini(adev);
3845
3846         return 0;
3847 }
3848
3849 static int gfx_v8_0_suspend(void *handle)
3850 {
3851         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3852
3853         return gfx_v8_0_hw_fini(adev);
3854 }
3855
3856 static int gfx_v8_0_resume(void *handle)
3857 {
3858         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3859
3860         return gfx_v8_0_hw_init(adev);
3861 }
3862
3863 static bool gfx_v8_0_is_idle(void *handle)
3864 {
3865         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3866
3867         if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE))
3868                 return false;
3869         else
3870                 return true;
3871 }
3872
3873 static int gfx_v8_0_wait_for_idle(void *handle)
3874 {
3875         unsigned i;
3876         u32 tmp;
3877         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3878
3879         for (i = 0; i < adev->usec_timeout; i++) {
3880                 /* read MC_STATUS */
3881                 tmp = RREG32(mmGRBM_STATUS) & GRBM_STATUS__GUI_ACTIVE_MASK;
3882
3883                 if (!REG_GET_FIELD(tmp, GRBM_STATUS, GUI_ACTIVE))
3884                         return 0;
3885                 udelay(1);
3886         }
3887         return -ETIMEDOUT;
3888 }
3889
3890 static void gfx_v8_0_print_status(void *handle)
3891 {
3892         int i;
3893         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3894
3895         dev_info(adev->dev, "GFX 8.x registers\n");
3896         dev_info(adev->dev, "  GRBM_STATUS=0x%08X\n",
3897                  RREG32(mmGRBM_STATUS));
3898         dev_info(adev->dev, "  GRBM_STATUS2=0x%08X\n",
3899                  RREG32(mmGRBM_STATUS2));
3900         dev_info(adev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
3901                  RREG32(mmGRBM_STATUS_SE0));
3902         dev_info(adev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
3903                  RREG32(mmGRBM_STATUS_SE1));
3904         dev_info(adev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
3905                  RREG32(mmGRBM_STATUS_SE2));
3906         dev_info(adev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
3907                  RREG32(mmGRBM_STATUS_SE3));
3908         dev_info(adev->dev, "  CP_STAT = 0x%08x\n", RREG32(mmCP_STAT));
3909         dev_info(adev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
3910                  RREG32(mmCP_STALLED_STAT1));
3911         dev_info(adev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
3912                  RREG32(mmCP_STALLED_STAT2));
3913         dev_info(adev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
3914                  RREG32(mmCP_STALLED_STAT3));
3915         dev_info(adev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
3916                  RREG32(mmCP_CPF_BUSY_STAT));
3917         dev_info(adev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
3918                  RREG32(mmCP_CPF_STALLED_STAT1));
3919         dev_info(adev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(mmCP_CPF_STATUS));
3920         dev_info(adev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(mmCP_CPC_BUSY_STAT));
3921         dev_info(adev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
3922                  RREG32(mmCP_CPC_STALLED_STAT1));
3923         dev_info(adev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(mmCP_CPC_STATUS));
3924
3925         for (i = 0; i < 32; i++) {
3926                 dev_info(adev->dev, "  GB_TILE_MODE%d=0x%08X\n",
3927                          i, RREG32(mmGB_TILE_MODE0 + (i * 4)));
3928         }
3929         for (i = 0; i < 16; i++) {
3930                 dev_info(adev->dev, "  GB_MACROTILE_MODE%d=0x%08X\n",
3931                          i, RREG32(mmGB_MACROTILE_MODE0 + (i * 4)));
3932         }
3933         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3934                 dev_info(adev->dev, "  se: %d\n", i);
3935                 gfx_v8_0_select_se_sh(adev, i, 0xffffffff);
3936                 dev_info(adev->dev, "  PA_SC_RASTER_CONFIG=0x%08X\n",
3937                          RREG32(mmPA_SC_RASTER_CONFIG));
3938                 dev_info(adev->dev, "  PA_SC_RASTER_CONFIG_1=0x%08X\n",
3939                          RREG32(mmPA_SC_RASTER_CONFIG_1));
3940         }
3941         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
3942
3943         dev_info(adev->dev, "  GB_ADDR_CONFIG=0x%08X\n",
3944                  RREG32(mmGB_ADDR_CONFIG));
3945         dev_info(adev->dev, "  HDP_ADDR_CONFIG=0x%08X\n",
3946                  RREG32(mmHDP_ADDR_CONFIG));
3947         dev_info(adev->dev, "  DMIF_ADDR_CALC=0x%08X\n",
3948                  RREG32(mmDMIF_ADDR_CALC));
3949
3950         dev_info(adev->dev, "  CP_MEQ_THRESHOLDS=0x%08X\n",
3951                  RREG32(mmCP_MEQ_THRESHOLDS));
3952         dev_info(adev->dev, "  SX_DEBUG_1=0x%08X\n",
3953                  RREG32(mmSX_DEBUG_1));
3954         dev_info(adev->dev, "  TA_CNTL_AUX=0x%08X\n",
3955                  RREG32(mmTA_CNTL_AUX));
3956         dev_info(adev->dev, "  SPI_CONFIG_CNTL=0x%08X\n",
3957                  RREG32(mmSPI_CONFIG_CNTL));
3958         dev_info(adev->dev, "  SQ_CONFIG=0x%08X\n",
3959                  RREG32(mmSQ_CONFIG));
3960         dev_info(adev->dev, "  DB_DEBUG=0x%08X\n",
3961                  RREG32(mmDB_DEBUG));
3962         dev_info(adev->dev, "  DB_DEBUG2=0x%08X\n",
3963                  RREG32(mmDB_DEBUG2));
3964         dev_info(adev->dev, "  DB_DEBUG3=0x%08X\n",
3965                  RREG32(mmDB_DEBUG3));
3966         dev_info(adev->dev, "  CB_HW_CONTROL=0x%08X\n",
3967                  RREG32(mmCB_HW_CONTROL));
3968         dev_info(adev->dev, "  SPI_CONFIG_CNTL_1=0x%08X\n",
3969                  RREG32(mmSPI_CONFIG_CNTL_1));
3970         dev_info(adev->dev, "  PA_SC_FIFO_SIZE=0x%08X\n",
3971                  RREG32(mmPA_SC_FIFO_SIZE));
3972         dev_info(adev->dev, "  VGT_NUM_INSTANCES=0x%08X\n",
3973                  RREG32(mmVGT_NUM_INSTANCES));
3974         dev_info(adev->dev, "  CP_PERFMON_CNTL=0x%08X\n",
3975                  RREG32(mmCP_PERFMON_CNTL));
3976         dev_info(adev->dev, "  PA_SC_FORCE_EOV_MAX_CNTS=0x%08X\n",
3977                  RREG32(mmPA_SC_FORCE_EOV_MAX_CNTS));
3978         dev_info(adev->dev, "  VGT_CACHE_INVALIDATION=0x%08X\n",
3979                  RREG32(mmVGT_CACHE_INVALIDATION));
3980         dev_info(adev->dev, "  VGT_GS_VERTEX_REUSE=0x%08X\n",
3981                  RREG32(mmVGT_GS_VERTEX_REUSE));
3982         dev_info(adev->dev, "  PA_SC_LINE_STIPPLE_STATE=0x%08X\n",
3983                  RREG32(mmPA_SC_LINE_STIPPLE_STATE));
3984         dev_info(adev->dev, "  PA_CL_ENHANCE=0x%08X\n",
3985                  RREG32(mmPA_CL_ENHANCE));
3986         dev_info(adev->dev, "  PA_SC_ENHANCE=0x%08X\n",
3987                  RREG32(mmPA_SC_ENHANCE));
3988
3989         dev_info(adev->dev, "  CP_ME_CNTL=0x%08X\n",
3990                  RREG32(mmCP_ME_CNTL));
3991         dev_info(adev->dev, "  CP_MAX_CONTEXT=0x%08X\n",
3992                  RREG32(mmCP_MAX_CONTEXT));
3993         dev_info(adev->dev, "  CP_ENDIAN_SWAP=0x%08X\n",
3994                  RREG32(mmCP_ENDIAN_SWAP));
3995         dev_info(adev->dev, "  CP_DEVICE_ID=0x%08X\n",
3996                  RREG32(mmCP_DEVICE_ID));
3997
3998         dev_info(adev->dev, "  CP_SEM_WAIT_TIMER=0x%08X\n",
3999                  RREG32(mmCP_SEM_WAIT_TIMER));
4000
4001         dev_info(adev->dev, "  CP_RB_WPTR_DELAY=0x%08X\n",
4002                  RREG32(mmCP_RB_WPTR_DELAY));
4003         dev_info(adev->dev, "  CP_RB_VMID=0x%08X\n",
4004                  RREG32(mmCP_RB_VMID));
4005         dev_info(adev->dev, "  CP_RB0_CNTL=0x%08X\n",
4006                  RREG32(mmCP_RB0_CNTL));
4007         dev_info(adev->dev, "  CP_RB0_WPTR=0x%08X\n",
4008                  RREG32(mmCP_RB0_WPTR));
4009         dev_info(adev->dev, "  CP_RB0_RPTR_ADDR=0x%08X\n",
4010                  RREG32(mmCP_RB0_RPTR_ADDR));
4011         dev_info(adev->dev, "  CP_RB0_RPTR_ADDR_HI=0x%08X\n",
4012                  RREG32(mmCP_RB0_RPTR_ADDR_HI));
4013         dev_info(adev->dev, "  CP_RB0_CNTL=0x%08X\n",
4014                  RREG32(mmCP_RB0_CNTL));
4015         dev_info(adev->dev, "  CP_RB0_BASE=0x%08X\n",
4016                  RREG32(mmCP_RB0_BASE));
4017         dev_info(adev->dev, "  CP_RB0_BASE_HI=0x%08X\n",
4018                  RREG32(mmCP_RB0_BASE_HI));
4019         dev_info(adev->dev, "  CP_MEC_CNTL=0x%08X\n",
4020                  RREG32(mmCP_MEC_CNTL));
4021         dev_info(adev->dev, "  CP_CPF_DEBUG=0x%08X\n",
4022                  RREG32(mmCP_CPF_DEBUG));
4023
4024         dev_info(adev->dev, "  SCRATCH_ADDR=0x%08X\n",
4025                  RREG32(mmSCRATCH_ADDR));
4026         dev_info(adev->dev, "  SCRATCH_UMSK=0x%08X\n",
4027                  RREG32(mmSCRATCH_UMSK));
4028
4029         dev_info(adev->dev, "  CP_INT_CNTL_RING0=0x%08X\n",
4030                  RREG32(mmCP_INT_CNTL_RING0));
4031         dev_info(adev->dev, "  RLC_LB_CNTL=0x%08X\n",
4032                  RREG32(mmRLC_LB_CNTL));
4033         dev_info(adev->dev, "  RLC_CNTL=0x%08X\n",
4034                  RREG32(mmRLC_CNTL));
4035         dev_info(adev->dev, "  RLC_CGCG_CGLS_CTRL=0x%08X\n",
4036                  RREG32(mmRLC_CGCG_CGLS_CTRL));
4037         dev_info(adev->dev, "  RLC_LB_CNTR_INIT=0x%08X\n",
4038                  RREG32(mmRLC_LB_CNTR_INIT));
4039         dev_info(adev->dev, "  RLC_LB_CNTR_MAX=0x%08X\n",
4040                  RREG32(mmRLC_LB_CNTR_MAX));
4041         dev_info(adev->dev, "  RLC_LB_INIT_CU_MASK=0x%08X\n",
4042                  RREG32(mmRLC_LB_INIT_CU_MASK));
4043         dev_info(adev->dev, "  RLC_LB_PARAMS=0x%08X\n",
4044                  RREG32(mmRLC_LB_PARAMS));
4045         dev_info(adev->dev, "  RLC_LB_CNTL=0x%08X\n",
4046                  RREG32(mmRLC_LB_CNTL));
4047         dev_info(adev->dev, "  RLC_MC_CNTL=0x%08X\n",
4048                  RREG32(mmRLC_MC_CNTL));
4049         dev_info(adev->dev, "  RLC_UCODE_CNTL=0x%08X\n",
4050                  RREG32(mmRLC_UCODE_CNTL));
4051
4052         mutex_lock(&adev->srbm_mutex);
4053         for (i = 0; i < 16; i++) {
4054                 vi_srbm_select(adev, 0, 0, 0, i);
4055                 dev_info(adev->dev, "  VM %d:\n", i);
4056                 dev_info(adev->dev, "  SH_MEM_CONFIG=0x%08X\n",
4057                          RREG32(mmSH_MEM_CONFIG));
4058                 dev_info(adev->dev, "  SH_MEM_APE1_BASE=0x%08X\n",
4059                          RREG32(mmSH_MEM_APE1_BASE));
4060                 dev_info(adev->dev, "  SH_MEM_APE1_LIMIT=0x%08X\n",
4061                          RREG32(mmSH_MEM_APE1_LIMIT));
4062                 dev_info(adev->dev, "  SH_MEM_BASES=0x%08X\n",
4063                          RREG32(mmSH_MEM_BASES));
4064         }
4065         vi_srbm_select(adev, 0, 0, 0, 0);
4066         mutex_unlock(&adev->srbm_mutex);
4067 }
4068
4069 static int gfx_v8_0_soft_reset(void *handle)
4070 {
4071         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4072         u32 tmp;
4073         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4074
4075         /* GRBM_STATUS */
4076         tmp = RREG32(mmGRBM_STATUS);
4077         if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
4078                    GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
4079                    GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
4080                    GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
4081                    GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
4082                    GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) {
4083                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4084                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4085                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4086                                                 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
4087         }
4088
4089         if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
4090                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4091                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4092                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4093                                                 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
4094         }
4095
4096         /* GRBM_STATUS2 */
4097         tmp = RREG32(mmGRBM_STATUS2);
4098         if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
4099                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4100                                                 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4101
4102         /* SRBM_STATUS */
4103         tmp = RREG32(mmSRBM_STATUS);
4104         if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
4105                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4106                                                 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
4107
4108         if (grbm_soft_reset || srbm_soft_reset) {
4109                 gfx_v8_0_print_status((void *)adev);
4110                 /* stop the rlc */
4111                 gfx_v8_0_rlc_stop(adev);
4112
4113                 /* Disable GFX parsing/prefetching */
4114                 gfx_v8_0_cp_gfx_enable(adev, false);
4115
4116                 /* Disable MEC parsing/prefetching */
4117                 gfx_v8_0_cp_compute_enable(adev, false);
4118
4119                 if (grbm_soft_reset || srbm_soft_reset) {
4120                         tmp = RREG32(mmGMCON_DEBUG);
4121                         tmp = REG_SET_FIELD(tmp,
4122                                             GMCON_DEBUG, GFX_STALL, 1);
4123                         tmp = REG_SET_FIELD(tmp,
4124                                             GMCON_DEBUG, GFX_CLEAR, 1);
4125                         WREG32(mmGMCON_DEBUG, tmp);
4126
4127                         udelay(50);
4128                 }
4129
4130                 if (grbm_soft_reset) {
4131                         tmp = RREG32(mmGRBM_SOFT_RESET);
4132                         tmp |= grbm_soft_reset;
4133                         dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
4134                         WREG32(mmGRBM_SOFT_RESET, tmp);
4135                         tmp = RREG32(mmGRBM_SOFT_RESET);
4136
4137                         udelay(50);
4138
4139                         tmp &= ~grbm_soft_reset;
4140                         WREG32(mmGRBM_SOFT_RESET, tmp);
4141                         tmp = RREG32(mmGRBM_SOFT_RESET);
4142                 }
4143
4144                 if (srbm_soft_reset) {
4145                         tmp = RREG32(mmSRBM_SOFT_RESET);
4146                         tmp |= srbm_soft_reset;
4147                         dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
4148                         WREG32(mmSRBM_SOFT_RESET, tmp);
4149                         tmp = RREG32(mmSRBM_SOFT_RESET);
4150
4151                         udelay(50);
4152
4153                         tmp &= ~srbm_soft_reset;
4154                         WREG32(mmSRBM_SOFT_RESET, tmp);
4155                         tmp = RREG32(mmSRBM_SOFT_RESET);
4156                 }
4157
4158                 if (grbm_soft_reset || srbm_soft_reset) {
4159                         tmp = RREG32(mmGMCON_DEBUG);
4160                         tmp = REG_SET_FIELD(tmp,
4161                                             GMCON_DEBUG, GFX_STALL, 0);
4162                         tmp = REG_SET_FIELD(tmp,
4163                                             GMCON_DEBUG, GFX_CLEAR, 0);
4164                         WREG32(mmGMCON_DEBUG, tmp);
4165                 }
4166
4167                 /* Wait a little for things to settle down */
4168                 udelay(50);
4169                 gfx_v8_0_print_status((void *)adev);
4170         }
4171         return 0;
4172 }
4173
4174 /**
4175  * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
4176  *
4177  * @adev: amdgpu_device pointer
4178  *
4179  * Fetches a GPU clock counter snapshot.
4180  * Returns the 64 bit clock counter snapshot.
4181  */
4182 uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
4183 {
4184         uint64_t clock;
4185
4186         mutex_lock(&adev->gfx.gpu_clock_mutex);
4187         WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
4188         clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
4189                 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
4190         mutex_unlock(&adev->gfx.gpu_clock_mutex);
4191         return clock;
4192 }
4193
4194 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
4195                                           uint32_t vmid,
4196                                           uint32_t gds_base, uint32_t gds_size,
4197                                           uint32_t gws_base, uint32_t gws_size,
4198                                           uint32_t oa_base, uint32_t oa_size)
4199 {
4200         gds_base = gds_base >> AMDGPU_GDS_SHIFT;
4201         gds_size = gds_size >> AMDGPU_GDS_SHIFT;
4202
4203         gws_base = gws_base >> AMDGPU_GWS_SHIFT;
4204         gws_size = gws_size >> AMDGPU_GWS_SHIFT;
4205
4206         oa_base = oa_base >> AMDGPU_OA_SHIFT;
4207         oa_size = oa_size >> AMDGPU_OA_SHIFT;
4208
4209         /* GDS Base */
4210         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4211         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4212                                 WRITE_DATA_DST_SEL(0)));
4213         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
4214         amdgpu_ring_write(ring, 0);
4215         amdgpu_ring_write(ring, gds_base);
4216
4217         /* GDS Size */
4218         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4219         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4220                                 WRITE_DATA_DST_SEL(0)));
4221         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
4222         amdgpu_ring_write(ring, 0);
4223         amdgpu_ring_write(ring, gds_size);
4224
4225         /* GWS */
4226         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4227         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4228                                 WRITE_DATA_DST_SEL(0)));
4229         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
4230         amdgpu_ring_write(ring, 0);
4231         amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
4232
4233         /* OA */
4234         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4235         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4236                                 WRITE_DATA_DST_SEL(0)));
4237         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
4238         amdgpu_ring_write(ring, 0);
4239         amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
4240 }
4241
4242 static int gfx_v8_0_early_init(void *handle)
4243 {
4244         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4245
4246         adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
4247         adev->gfx.num_compute_rings = GFX8_NUM_COMPUTE_RINGS;
4248         gfx_v8_0_set_ring_funcs(adev);
4249         gfx_v8_0_set_irq_funcs(adev);
4250         gfx_v8_0_set_gds_init(adev);
4251
4252         return 0;
4253 }
4254
4255 static int gfx_v8_0_late_init(void *handle)
4256 {
4257         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4258         int r;
4259
4260         r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
4261         if (r)
4262                 return r;
4263
4264         r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
4265         if (r)
4266                 return r;
4267
4268         /* requires IBs so do in late init after IB pool is initialized */
4269         r = gfx_v8_0_do_edc_gpr_workarounds(adev);
4270         if (r)
4271                 return r;
4272
4273         return 0;
4274 }
4275
4276 static int gfx_v8_0_set_powergating_state(void *handle,
4277                                           enum amd_powergating_state state)
4278 {
4279         return 0;
4280 }
4281
4282 static void fiji_send_serdes_cmd(struct amdgpu_device *adev,
4283                 uint32_t reg_addr, uint32_t cmd)
4284 {
4285         uint32_t data;
4286
4287         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
4288
4289         WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
4290         WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
4291
4292         data = RREG32(mmRLC_SERDES_WR_CTRL);
4293         data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
4294                         RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
4295                         RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
4296                         RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
4297                         RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
4298                         RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
4299                         RLC_SERDES_WR_CTRL__POWER_UP_MASK |
4300                         RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
4301                         RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
4302                         RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
4303                         RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
4304         data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
4305                         (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
4306                         (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
4307                         (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
4308
4309         WREG32(mmRLC_SERDES_WR_CTRL, data);
4310 }
4311
4312 static void fiji_update_medium_grain_clock_gating(struct amdgpu_device *adev,
4313                 bool enable)
4314 {
4315         uint32_t temp, data;
4316
4317         /* It is disabled by HW by default */
4318         if (enable) {
4319                 /* 1 - RLC memory Light sleep */
4320                 temp = data = RREG32(mmRLC_MEM_SLP_CNTL);
4321                 data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4322                 if (temp != data)
4323                         WREG32(mmRLC_MEM_SLP_CNTL, data);
4324
4325                 /* 2 - CP memory Light sleep */
4326                 temp = data = RREG32(mmCP_MEM_SLP_CNTL);
4327                 data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4328                 if (temp != data)
4329                         WREG32(mmCP_MEM_SLP_CNTL, data);
4330
4331                 /* 3 - RLC_CGTT_MGCG_OVERRIDE */
4332                 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
4333                 data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
4334                                 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
4335                                 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
4336                                 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
4337
4338                 if (temp != data)
4339                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
4340
4341                 /* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
4342                 gfx_v8_0_wait_for_rlc_serdes(adev);
4343
4344                 /* 5 - clear mgcg override */
4345                 fiji_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
4346
4347                 /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
4348                 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
4349                 data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
4350                 data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
4351                 data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
4352                 data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
4353                 data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
4354                 data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
4355                 data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
4356                 if (temp != data)
4357                         WREG32(mmCGTS_SM_CTRL_REG, data);
4358                 udelay(50);
4359
4360                 /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
4361                 gfx_v8_0_wait_for_rlc_serdes(adev);
4362         } else {
4363                 /* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
4364                 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
4365                 data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
4366                                 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
4367                                 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
4368                                 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
4369                 if (temp != data)
4370                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
4371
4372                 /* 2 - disable MGLS in RLC */
4373                 data = RREG32(mmRLC_MEM_SLP_CNTL);
4374                 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
4375                         data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4376                         WREG32(mmRLC_MEM_SLP_CNTL, data);
4377                 }
4378
4379                 /* 3 - disable MGLS in CP */
4380                 data = RREG32(mmCP_MEM_SLP_CNTL);
4381                 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
4382                         data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4383                         WREG32(mmCP_MEM_SLP_CNTL, data);
4384                 }
4385
4386                 /* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
4387                 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
4388                 data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
4389                                 CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
4390                 if (temp != data)
4391                         WREG32(mmCGTS_SM_CTRL_REG, data);
4392
4393                 /* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
4394                 gfx_v8_0_wait_for_rlc_serdes(adev);
4395
4396                 /* 6 - set mgcg override */
4397                 fiji_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
4398
4399                 udelay(50);
4400
4401                 /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
4402                 gfx_v8_0_wait_for_rlc_serdes(adev);
4403         }
4404 }
4405
4406 static void fiji_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
4407                 bool enable)
4408 {
4409         uint32_t temp, temp1, data, data1;
4410
4411         temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
4412
4413         if (enable) {
4414                 /* 1 enable cntx_empty_int_enable/cntx_busy_int_enable/
4415                  * Cmp_busy/GFX_Idle interrupts
4416                  */
4417                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4418
4419                 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
4420                 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
4421                 if (temp1 != data1)
4422                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
4423
4424                 /* 2 wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
4425                 gfx_v8_0_wait_for_rlc_serdes(adev);
4426
4427                 /* 3 - clear cgcg override */
4428                 fiji_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
4429
4430                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
4431                 gfx_v8_0_wait_for_rlc_serdes(adev);
4432
4433                 /* 4 - write cmd to set CGLS */
4434                 fiji_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
4435
4436                 /* 5 - enable cgcg */
4437                 data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
4438
4439                 /* enable cgls*/
4440                 data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
4441
4442                 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
4443                 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
4444
4445                 if (temp1 != data1)
4446                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
4447
4448                 if (temp != data)
4449                         WREG32(mmRLC_CGCG_CGLS_CTRL, data);
4450         } else {
4451                 /* disable cntx_empty_int_enable & GFX Idle interrupt */
4452                 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4453
4454                 /* TEST CGCG */
4455                 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
4456                 data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
4457                                 RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
4458                 if (temp1 != data1)
4459                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
4460
4461                 /* read gfx register to wake up cgcg */
4462                 RREG32(mmCB_CGTT_SCLK_CTRL);
4463                 RREG32(mmCB_CGTT_SCLK_CTRL);
4464                 RREG32(mmCB_CGTT_SCLK_CTRL);
4465                 RREG32(mmCB_CGTT_SCLK_CTRL);
4466
4467                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
4468                 gfx_v8_0_wait_for_rlc_serdes(adev);
4469
4470                 /* write cmd to Set CGCG Overrride */
4471                 fiji_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
4472
4473                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
4474                 gfx_v8_0_wait_for_rlc_serdes(adev);
4475
4476                 /* write cmd to Clear CGLS */
4477                 fiji_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
4478
4479                 /* disable cgcg, cgls should be disabled too. */
4480                 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
4481                                 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4482                 if (temp != data)
4483                         WREG32(mmRLC_CGCG_CGLS_CTRL, data);
4484         }
4485 }
4486 static int fiji_update_gfx_clock_gating(struct amdgpu_device *adev,
4487                 bool enable)
4488 {
4489         if (enable) {
4490                 /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
4491                  * ===  MGCG + MGLS + TS(CG/LS) ===
4492                  */
4493                 fiji_update_medium_grain_clock_gating(adev, enable);
4494                 fiji_update_coarse_grain_clock_gating(adev, enable);
4495         } else {
4496                 /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
4497                  * ===  CGCG + CGLS ===
4498                  */
4499                 fiji_update_coarse_grain_clock_gating(adev, enable);
4500                 fiji_update_medium_grain_clock_gating(adev, enable);
4501         }
4502         return 0;
4503 }
4504
4505 static int gfx_v8_0_set_clockgating_state(void *handle,
4506                                           enum amd_clockgating_state state)
4507 {
4508         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4509
4510         switch (adev->asic_type) {
4511         case CHIP_FIJI:
4512                 fiji_update_gfx_clock_gating(adev,
4513                                 state == AMD_CG_STATE_GATE ? true : false);
4514                 break;
4515         default:
4516                 break;
4517         }
4518         return 0;
4519 }
4520
4521 static u32 gfx_v8_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
4522 {
4523         u32 rptr;
4524
4525         rptr = ring->adev->wb.wb[ring->rptr_offs];
4526
4527         return rptr;
4528 }
4529
4530 static u32 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
4531 {
4532         struct amdgpu_device *adev = ring->adev;
4533         u32 wptr;
4534
4535         if (ring->use_doorbell)
4536                 /* XXX check if swapping is necessary on BE */
4537                 wptr = ring->adev->wb.wb[ring->wptr_offs];
4538         else
4539                 wptr = RREG32(mmCP_RB0_WPTR);
4540
4541         return wptr;
4542 }
4543
4544 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
4545 {
4546         struct amdgpu_device *adev = ring->adev;
4547
4548         if (ring->use_doorbell) {
4549                 /* XXX check if swapping is necessary on BE */
4550                 adev->wb.wb[ring->wptr_offs] = ring->wptr;
4551                 WDOORBELL32(ring->doorbell_index, ring->wptr);
4552         } else {
4553                 WREG32(mmCP_RB0_WPTR, ring->wptr);
4554                 (void)RREG32(mmCP_RB0_WPTR);
4555         }
4556 }
4557
4558 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
4559 {
4560         u32 ref_and_mask, reg_mem_engine;
4561
4562         if (ring->type == AMDGPU_RING_TYPE_COMPUTE) {
4563                 switch (ring->me) {
4564                 case 1:
4565                         ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
4566                         break;
4567                 case 2:
4568                         ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
4569                         break;
4570                 default:
4571                         return;
4572                 }
4573                 reg_mem_engine = 0;
4574         } else {
4575                 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
4576                 reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
4577         }
4578
4579         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
4580         amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
4581                                  WAIT_REG_MEM_FUNCTION(3) |  /* == */
4582                                  reg_mem_engine));
4583         amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
4584         amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
4585         amdgpu_ring_write(ring, ref_and_mask);
4586         amdgpu_ring_write(ring, ref_and_mask);
4587         amdgpu_ring_write(ring, 0x20); /* poll interval */
4588 }
4589
4590 static void gfx_v8_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
4591 {
4592         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4593         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4594                                  WRITE_DATA_DST_SEL(0) |
4595                                  WR_CONFIRM));
4596         amdgpu_ring_write(ring, mmHDP_DEBUG0);
4597         amdgpu_ring_write(ring, 0);
4598         amdgpu_ring_write(ring, 1);
4599
4600 }
4601
4602 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
4603                                   struct amdgpu_ib *ib)
4604 {
4605         bool need_ctx_switch = ring->current_ctx != ib->ctx;
4606         u32 header, control = 0;
4607         u32 next_rptr = ring->wptr + 5;
4608
4609         /* drop the CE preamble IB for the same context */
4610         if ((ib->flags & AMDGPU_IB_FLAG_PREAMBLE) && !need_ctx_switch)
4611                 return;
4612
4613         if (need_ctx_switch)
4614                 next_rptr += 2;
4615
4616         next_rptr += 4;
4617         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4618         amdgpu_ring_write(ring, WRITE_DATA_DST_SEL(5) | WR_CONFIRM);
4619         amdgpu_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
4620         amdgpu_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
4621         amdgpu_ring_write(ring, next_rptr);
4622
4623         /* insert SWITCH_BUFFER packet before first IB in the ring frame */
4624         if (need_ctx_switch) {
4625                 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4626                 amdgpu_ring_write(ring, 0);
4627         }
4628
4629         if (ib->flags & AMDGPU_IB_FLAG_CE)
4630                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
4631         else
4632                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
4633
4634         control |= ib->length_dw | (ib->vm_id << 24);
4635
4636         amdgpu_ring_write(ring, header);
4637         amdgpu_ring_write(ring,
4638 #ifdef __BIG_ENDIAN
4639                           (2 << 0) |
4640 #endif
4641                           (ib->gpu_addr & 0xFFFFFFFC));
4642         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
4643         amdgpu_ring_write(ring, control);
4644 }
4645
4646 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
4647                                   struct amdgpu_ib *ib)
4648 {
4649         u32 header, control = 0;
4650         u32 next_rptr = ring->wptr + 5;
4651
4652         control |= INDIRECT_BUFFER_VALID;
4653
4654         next_rptr += 4;
4655         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4656         amdgpu_ring_write(ring, WRITE_DATA_DST_SEL(5) | WR_CONFIRM);
4657         amdgpu_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
4658         amdgpu_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
4659         amdgpu_ring_write(ring, next_rptr);
4660
4661         header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
4662
4663         control |= ib->length_dw | (ib->vm_id << 24);
4664
4665         amdgpu_ring_write(ring, header);
4666         amdgpu_ring_write(ring,
4667 #ifdef __BIG_ENDIAN
4668                                           (2 << 0) |
4669 #endif
4670                                           (ib->gpu_addr & 0xFFFFFFFC));
4671         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
4672         amdgpu_ring_write(ring, control);
4673 }
4674
4675 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
4676                                          u64 seq, unsigned flags)
4677 {
4678         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
4679         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
4680
4681         /* EVENT_WRITE_EOP - flush caches, send int */
4682         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
4683         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
4684                                  EOP_TC_ACTION_EN |
4685                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
4686                                  EVENT_INDEX(5)));
4687         amdgpu_ring_write(ring, addr & 0xfffffffc);
4688         amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
4689                           DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
4690         amdgpu_ring_write(ring, lower_32_bits(seq));
4691         amdgpu_ring_write(ring, upper_32_bits(seq));
4692
4693 }
4694
4695 static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
4696 {
4697         int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX);
4698         uint32_t seq = ring->fence_drv.sync_seq;
4699         uint64_t addr = ring->fence_drv.gpu_addr;
4700
4701         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
4702         amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
4703                                  WAIT_REG_MEM_FUNCTION(3) | /* equal */
4704                                  WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
4705         amdgpu_ring_write(ring, addr & 0xfffffffc);
4706         amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
4707         amdgpu_ring_write(ring, seq);
4708         amdgpu_ring_write(ring, 0xffffffff);
4709         amdgpu_ring_write(ring, 4); /* poll interval */
4710
4711         if (usepfp) {
4712                 /* synce CE with ME to prevent CE fetch CEIB before context switch done */
4713                 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4714                 amdgpu_ring_write(ring, 0);
4715                 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4716                 amdgpu_ring_write(ring, 0);
4717         }
4718 }
4719
4720 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
4721                                         unsigned vm_id, uint64_t pd_addr)
4722 {
4723         int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX);
4724
4725         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4726         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
4727                                  WRITE_DATA_DST_SEL(0)) |
4728                                  WR_CONFIRM);
4729         if (vm_id < 8) {
4730                 amdgpu_ring_write(ring,
4731                                   (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id));
4732         } else {
4733                 amdgpu_ring_write(ring,
4734                                   (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vm_id - 8));
4735         }
4736         amdgpu_ring_write(ring, 0);
4737         amdgpu_ring_write(ring, pd_addr >> 12);
4738
4739         /* bits 0-15 are the VM contexts0-15 */
4740         /* invalidate the cache */
4741         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4742         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4743                                  WRITE_DATA_DST_SEL(0)));
4744         amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
4745         amdgpu_ring_write(ring, 0);
4746         amdgpu_ring_write(ring, 1 << vm_id);
4747
4748         /* wait for the invalidate to complete */
4749         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
4750         amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
4751                                  WAIT_REG_MEM_FUNCTION(0) |  /* always */
4752                                  WAIT_REG_MEM_ENGINE(0))); /* me */
4753         amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
4754         amdgpu_ring_write(ring, 0);
4755         amdgpu_ring_write(ring, 0); /* ref */
4756         amdgpu_ring_write(ring, 0); /* mask */
4757         amdgpu_ring_write(ring, 0x20); /* poll interval */
4758
4759         /* compute doesn't have PFP */
4760         if (usepfp) {
4761                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
4762                 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4763                 amdgpu_ring_write(ring, 0x0);
4764                 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4765                 amdgpu_ring_write(ring, 0);
4766                 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4767                 amdgpu_ring_write(ring, 0);
4768         }
4769 }
4770
4771 static u32 gfx_v8_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
4772 {
4773         return ring->adev->wb.wb[ring->rptr_offs];
4774 }
4775
4776 static u32 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
4777 {
4778         return ring->adev->wb.wb[ring->wptr_offs];
4779 }
4780
4781 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
4782 {
4783         struct amdgpu_device *adev = ring->adev;
4784
4785         /* XXX check if swapping is necessary on BE */
4786         adev->wb.wb[ring->wptr_offs] = ring->wptr;
4787         WDOORBELL32(ring->doorbell_index, ring->wptr);
4788 }
4789
4790 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
4791                                              u64 addr, u64 seq,
4792                                              unsigned flags)
4793 {
4794         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
4795         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
4796
4797         /* RELEASE_MEM - flush caches, send int */
4798         amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
4799         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
4800                                  EOP_TC_ACTION_EN |
4801                                  EOP_TC_WB_ACTION_EN |
4802                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
4803                                  EVENT_INDEX(5)));
4804         amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
4805         amdgpu_ring_write(ring, addr & 0xfffffffc);
4806         amdgpu_ring_write(ring, upper_32_bits(addr));
4807         amdgpu_ring_write(ring, lower_32_bits(seq));
4808         amdgpu_ring_write(ring, upper_32_bits(seq));
4809 }
4810
4811 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
4812                                                  enum amdgpu_interrupt_state state)
4813 {
4814         u32 cp_int_cntl;
4815
4816         switch (state) {
4817         case AMDGPU_IRQ_STATE_DISABLE:
4818                 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
4819                 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
4820                                             TIME_STAMP_INT_ENABLE, 0);
4821                 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
4822                 break;
4823         case AMDGPU_IRQ_STATE_ENABLE:
4824                 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
4825                 cp_int_cntl =
4826                         REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
4827                                       TIME_STAMP_INT_ENABLE, 1);
4828                 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
4829                 break;
4830         default:
4831                 break;
4832         }
4833 }
4834
4835 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
4836                                                      int me, int pipe,
4837                                                      enum amdgpu_interrupt_state state)
4838 {
4839         u32 mec_int_cntl, mec_int_cntl_reg;
4840
4841         /*
4842          * amdgpu controls only pipe 0 of MEC1. That's why this function only
4843          * handles the setting of interrupts for this specific pipe. All other
4844          * pipes' interrupts are set by amdkfd.
4845          */
4846
4847         if (me == 1) {
4848                 switch (pipe) {
4849                 case 0:
4850                         mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
4851                         break;
4852                 default:
4853                         DRM_DEBUG("invalid pipe %d\n", pipe);
4854                         return;
4855                 }
4856         } else {
4857                 DRM_DEBUG("invalid me %d\n", me);
4858                 return;
4859         }
4860
4861         switch (state) {
4862         case AMDGPU_IRQ_STATE_DISABLE:
4863                 mec_int_cntl = RREG32(mec_int_cntl_reg);
4864                 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
4865                                              TIME_STAMP_INT_ENABLE, 0);
4866                 WREG32(mec_int_cntl_reg, mec_int_cntl);
4867                 break;
4868         case AMDGPU_IRQ_STATE_ENABLE:
4869                 mec_int_cntl = RREG32(mec_int_cntl_reg);
4870                 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
4871                                              TIME_STAMP_INT_ENABLE, 1);
4872                 WREG32(mec_int_cntl_reg, mec_int_cntl);
4873                 break;
4874         default:
4875                 break;
4876         }
4877 }
4878
4879 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
4880                                              struct amdgpu_irq_src *source,
4881                                              unsigned type,
4882                                              enum amdgpu_interrupt_state state)
4883 {
4884         u32 cp_int_cntl;
4885
4886         switch (state) {
4887         case AMDGPU_IRQ_STATE_DISABLE:
4888                 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
4889                 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
4890                                             PRIV_REG_INT_ENABLE, 0);
4891                 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
4892                 break;
4893         case AMDGPU_IRQ_STATE_ENABLE:
4894                 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
4895                 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
4896                                             PRIV_REG_INT_ENABLE, 1);
4897                 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
4898                 break;
4899         default:
4900                 break;
4901         }
4902
4903         return 0;
4904 }
4905
4906 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
4907                                               struct amdgpu_irq_src *source,
4908                                               unsigned type,
4909                                               enum amdgpu_interrupt_state state)
4910 {
4911         u32 cp_int_cntl;
4912
4913         switch (state) {
4914         case AMDGPU_IRQ_STATE_DISABLE:
4915                 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
4916                 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
4917                                             PRIV_INSTR_INT_ENABLE, 0);
4918                 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
4919                 break;
4920         case AMDGPU_IRQ_STATE_ENABLE:
4921                 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
4922                 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
4923                                             PRIV_INSTR_INT_ENABLE, 1);
4924                 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
4925                 break;
4926         default:
4927                 break;
4928         }
4929
4930         return 0;
4931 }
4932
4933 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
4934                                             struct amdgpu_irq_src *src,
4935                                             unsigned type,
4936                                             enum amdgpu_interrupt_state state)
4937 {
4938         switch (type) {
4939         case AMDGPU_CP_IRQ_GFX_EOP:
4940                 gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
4941                 break;
4942         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
4943                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
4944                 break;
4945         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
4946                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
4947                 break;
4948         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
4949                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
4950                 break;
4951         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
4952                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
4953                 break;
4954         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
4955                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
4956                 break;
4957         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
4958                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
4959                 break;
4960         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
4961                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
4962                 break;
4963         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
4964                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
4965                 break;
4966         default:
4967                 break;
4968         }
4969         return 0;
4970 }
4971
4972 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
4973                             struct amdgpu_irq_src *source,
4974                             struct amdgpu_iv_entry *entry)
4975 {
4976         int i;
4977         u8 me_id, pipe_id, queue_id;
4978         struct amdgpu_ring *ring;
4979
4980         DRM_DEBUG("IH: CP EOP\n");
4981         me_id = (entry->ring_id & 0x0c) >> 2;
4982         pipe_id = (entry->ring_id & 0x03) >> 0;
4983         queue_id = (entry->ring_id & 0x70) >> 4;
4984
4985         switch (me_id) {
4986         case 0:
4987                 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
4988                 break;
4989         case 1:
4990         case 2:
4991                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4992                         ring = &adev->gfx.compute_ring[i];
4993                         /* Per-queue interrupt is supported for MEC starting from VI.
4994                           * The interrupt can only be enabled/disabled per pipe instead of per queue.
4995                           */
4996                         if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
4997                                 amdgpu_fence_process(ring);
4998                 }
4999                 break;
5000         }
5001         return 0;
5002 }
5003
5004 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
5005                                  struct amdgpu_irq_src *source,
5006                                  struct amdgpu_iv_entry *entry)
5007 {
5008         DRM_ERROR("Illegal register access in command stream\n");
5009         schedule_work(&adev->reset_work);
5010         return 0;
5011 }
5012
5013 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
5014                                   struct amdgpu_irq_src *source,
5015                                   struct amdgpu_iv_entry *entry)
5016 {
5017         DRM_ERROR("Illegal instruction in command stream\n");
5018         schedule_work(&adev->reset_work);
5019         return 0;
5020 }
5021
5022 const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
5023         .early_init = gfx_v8_0_early_init,
5024         .late_init = gfx_v8_0_late_init,
5025         .sw_init = gfx_v8_0_sw_init,
5026         .sw_fini = gfx_v8_0_sw_fini,
5027         .hw_init = gfx_v8_0_hw_init,
5028         .hw_fini = gfx_v8_0_hw_fini,
5029         .suspend = gfx_v8_0_suspend,
5030         .resume = gfx_v8_0_resume,
5031         .is_idle = gfx_v8_0_is_idle,
5032         .wait_for_idle = gfx_v8_0_wait_for_idle,
5033         .soft_reset = gfx_v8_0_soft_reset,
5034         .print_status = gfx_v8_0_print_status,
5035         .set_clockgating_state = gfx_v8_0_set_clockgating_state,
5036         .set_powergating_state = gfx_v8_0_set_powergating_state,
5037 };
5038
5039 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
5040         .get_rptr = gfx_v8_0_ring_get_rptr_gfx,
5041         .get_wptr = gfx_v8_0_ring_get_wptr_gfx,
5042         .set_wptr = gfx_v8_0_ring_set_wptr_gfx,
5043         .parse_cs = NULL,
5044         .emit_ib = gfx_v8_0_ring_emit_ib_gfx,
5045         .emit_fence = gfx_v8_0_ring_emit_fence_gfx,
5046         .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
5047         .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
5048         .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
5049         .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
5050         .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
5051         .test_ring = gfx_v8_0_ring_test_ring,
5052         .test_ib = gfx_v8_0_ring_test_ib,
5053         .insert_nop = amdgpu_ring_insert_nop,
5054         .pad_ib = amdgpu_ring_generic_pad_ib,
5055 };
5056
5057 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
5058         .get_rptr = gfx_v8_0_ring_get_rptr_compute,
5059         .get_wptr = gfx_v8_0_ring_get_wptr_compute,
5060         .set_wptr = gfx_v8_0_ring_set_wptr_compute,
5061         .parse_cs = NULL,
5062         .emit_ib = gfx_v8_0_ring_emit_ib_compute,
5063         .emit_fence = gfx_v8_0_ring_emit_fence_compute,
5064         .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
5065         .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
5066         .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
5067         .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
5068         .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
5069         .test_ring = gfx_v8_0_ring_test_ring,
5070         .test_ib = gfx_v8_0_ring_test_ib,
5071         .insert_nop = amdgpu_ring_insert_nop,
5072         .pad_ib = amdgpu_ring_generic_pad_ib,
5073 };
5074
5075 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
5076 {
5077         int i;
5078
5079         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
5080                 adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
5081
5082         for (i = 0; i < adev->gfx.num_compute_rings; i++)
5083                 adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
5084 }
5085
5086 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
5087         .set = gfx_v8_0_set_eop_interrupt_state,
5088         .process = gfx_v8_0_eop_irq,
5089 };
5090
5091 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
5092         .set = gfx_v8_0_set_priv_reg_fault_state,
5093         .process = gfx_v8_0_priv_reg_irq,
5094 };
5095
5096 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
5097         .set = gfx_v8_0_set_priv_inst_fault_state,
5098         .process = gfx_v8_0_priv_inst_irq,
5099 };
5100
5101 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
5102 {
5103         adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
5104         adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
5105
5106         adev->gfx.priv_reg_irq.num_types = 1;
5107         adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
5108
5109         adev->gfx.priv_inst_irq.num_types = 1;
5110         adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
5111 }
5112
5113 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
5114 {
5115         /* init asci gds info */
5116         adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
5117         adev->gds.gws.total_size = 64;
5118         adev->gds.oa.total_size = 16;
5119
5120         if (adev->gds.mem.total_size == 64 * 1024) {
5121                 adev->gds.mem.gfx_partition_size = 4096;
5122                 adev->gds.mem.cs_partition_size = 4096;
5123
5124                 adev->gds.gws.gfx_partition_size = 4;
5125                 adev->gds.gws.cs_partition_size = 4;
5126
5127                 adev->gds.oa.gfx_partition_size = 4;
5128                 adev->gds.oa.cs_partition_size = 1;
5129         } else {
5130                 adev->gds.mem.gfx_partition_size = 1024;
5131                 adev->gds.mem.cs_partition_size = 1024;
5132
5133                 adev->gds.gws.gfx_partition_size = 16;
5134                 adev->gds.gws.cs_partition_size = 16;
5135
5136                 adev->gds.oa.gfx_partition_size = 4;
5137                 adev->gds.oa.cs_partition_size = 4;
5138         }
5139 }
5140
5141 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
5142 {
5143         u32 data, mask;
5144
5145         data = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG);
5146         data |= RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
5147
5148         data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
5149         data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
5150
5151         mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_cu_per_sh);
5152
5153         return (~data) & mask;
5154 }
5155
5156 int gfx_v8_0_get_cu_info(struct amdgpu_device *adev,
5157                          struct amdgpu_cu_info *cu_info)
5158 {
5159         int i, j, k, counter, active_cu_number = 0;
5160         u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
5161
5162         if (!adev || !cu_info)
5163                 return -EINVAL;
5164
5165         memset(cu_info, 0, sizeof(*cu_info));
5166
5167         mutex_lock(&adev->grbm_idx_mutex);
5168         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
5169                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
5170                         mask = 1;
5171                         ao_bitmap = 0;
5172                         counter = 0;
5173                         gfx_v8_0_select_se_sh(adev, i, j);
5174                         bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
5175                         cu_info->bitmap[i][j] = bitmap;
5176
5177                         for (k = 0; k < 16; k ++) {
5178                                 if (bitmap & mask) {
5179                                         if (counter < 2)
5180                                                 ao_bitmap |= mask;
5181                                         counter ++;
5182                                 }
5183                                 mask <<= 1;
5184                         }
5185                         active_cu_number += counter;
5186                         ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
5187                 }
5188         }
5189         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
5190         mutex_unlock(&adev->grbm_idx_mutex);
5191
5192         cu_info->number = active_cu_number;
5193         cu_info->ao_cu_mask = ao_cu_mask;
5194
5195         return 0;
5196 }
This page took 0.364791 seconds and 4 git commands to generate.