]> Git Repo - linux.git/blob - drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
Merge branch 'master' of https://scm.osdn.net/gitroot/tomoyo/tomoyo-test1
[linux.git] / drivers / gpu / drm / amd / amdgpu / gfx_v8_0.c
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23
24 #include <linux/delay.h>
25 #include <linux/kernel.h>
26 #include <linux/firmware.h>
27 #include <linux/module.h>
28 #include <linux/pci.h>
29
30 #include "amdgpu.h"
31 #include "amdgpu_gfx.h"
32 #include "vi.h"
33 #include "vi_structs.h"
34 #include "vid.h"
35 #include "amdgpu_ucode.h"
36 #include "amdgpu_atombios.h"
37 #include "atombios_i2c.h"
38 #include "clearstate_vi.h"
39
40 #include "gmc/gmc_8_2_d.h"
41 #include "gmc/gmc_8_2_sh_mask.h"
42
43 #include "oss/oss_3_0_d.h"
44 #include "oss/oss_3_0_sh_mask.h"
45
46 #include "bif/bif_5_0_d.h"
47 #include "bif/bif_5_0_sh_mask.h"
48 #include "gca/gfx_8_0_d.h"
49 #include "gca/gfx_8_0_enum.h"
50 #include "gca/gfx_8_0_sh_mask.h"
51
52 #include "dce/dce_10_0_d.h"
53 #include "dce/dce_10_0_sh_mask.h"
54
55 #include "smu/smu_7_1_3_d.h"
56
57 #include "ivsrcid/ivsrcid_vislands30.h"
58
59 #define GFX8_NUM_GFX_RINGS     1
60 #define GFX8_MEC_HPD_SIZE 4096
61
62 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
63 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
64 #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
65 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
66
67 #define ARRAY_MODE(x)                                   ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
68 #define PIPE_CONFIG(x)                                  ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
69 #define TILE_SPLIT(x)                                   ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
70 #define MICRO_TILE_MODE_NEW(x)                          ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
71 #define SAMPLE_SPLIT(x)                                 ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
72 #define BANK_WIDTH(x)                                   ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
73 #define BANK_HEIGHT(x)                                  ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
74 #define MACRO_TILE_ASPECT(x)                            ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
75 #define NUM_BANKS(x)                                    ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
76
77 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK            0x00000001L
78 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK            0x00000002L
79 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK           0x00000004L
80 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK           0x00000008L
81 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK           0x00000010L
82 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK           0x00000020L
83
84 /* BPM SERDES CMD */
85 #define SET_BPM_SERDES_CMD    1
86 #define CLE_BPM_SERDES_CMD    0
87
88 /* BPM Register Address*/
89 enum {
90         BPM_REG_CGLS_EN = 0,        /* Enable/Disable CGLS */
91         BPM_REG_CGLS_ON,            /* ON/OFF CGLS: shall be controlled by RLC FW */
92         BPM_REG_CGCG_OVERRIDE,      /* Set/Clear CGCG Override */
93         BPM_REG_MGCG_OVERRIDE,      /* Set/Clear MGCG Override */
94         BPM_REG_FGCG_OVERRIDE,      /* Set/Clear FGCG Override */
95         BPM_REG_FGCG_MAX
96 };
97
98 #define RLC_FormatDirectRegListLength        14
99
100 MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
101 MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
102 MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
103 MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
104 MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
105 MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
106
107 MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
108 MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
109 MODULE_FIRMWARE("amdgpu/stoney_me.bin");
110 MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
111 MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
112
113 MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
114 MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
115 MODULE_FIRMWARE("amdgpu/tonga_me.bin");
116 MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
117 MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
118 MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
119
120 MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
121 MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
122 MODULE_FIRMWARE("amdgpu/topaz_me.bin");
123 MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
124 MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
125
126 MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
127 MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
128 MODULE_FIRMWARE("amdgpu/fiji_me.bin");
129 MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
130 MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
131 MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
132
133 MODULE_FIRMWARE("amdgpu/polaris10_ce.bin");
134 MODULE_FIRMWARE("amdgpu/polaris10_ce_2.bin");
135 MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin");
136 MODULE_FIRMWARE("amdgpu/polaris10_pfp_2.bin");
137 MODULE_FIRMWARE("amdgpu/polaris10_me.bin");
138 MODULE_FIRMWARE("amdgpu/polaris10_me_2.bin");
139 MODULE_FIRMWARE("amdgpu/polaris10_mec.bin");
140 MODULE_FIRMWARE("amdgpu/polaris10_mec_2.bin");
141 MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin");
142 MODULE_FIRMWARE("amdgpu/polaris10_mec2_2.bin");
143 MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin");
144
145 MODULE_FIRMWARE("amdgpu/polaris11_ce.bin");
146 MODULE_FIRMWARE("amdgpu/polaris11_ce_2.bin");
147 MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin");
148 MODULE_FIRMWARE("amdgpu/polaris11_pfp_2.bin");
149 MODULE_FIRMWARE("amdgpu/polaris11_me.bin");
150 MODULE_FIRMWARE("amdgpu/polaris11_me_2.bin");
151 MODULE_FIRMWARE("amdgpu/polaris11_mec.bin");
152 MODULE_FIRMWARE("amdgpu/polaris11_mec_2.bin");
153 MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin");
154 MODULE_FIRMWARE("amdgpu/polaris11_mec2_2.bin");
155 MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin");
156
157 MODULE_FIRMWARE("amdgpu/polaris12_ce.bin");
158 MODULE_FIRMWARE("amdgpu/polaris12_ce_2.bin");
159 MODULE_FIRMWARE("amdgpu/polaris12_pfp.bin");
160 MODULE_FIRMWARE("amdgpu/polaris12_pfp_2.bin");
161 MODULE_FIRMWARE("amdgpu/polaris12_me.bin");
162 MODULE_FIRMWARE("amdgpu/polaris12_me_2.bin");
163 MODULE_FIRMWARE("amdgpu/polaris12_mec.bin");
164 MODULE_FIRMWARE("amdgpu/polaris12_mec_2.bin");
165 MODULE_FIRMWARE("amdgpu/polaris12_mec2.bin");
166 MODULE_FIRMWARE("amdgpu/polaris12_mec2_2.bin");
167 MODULE_FIRMWARE("amdgpu/polaris12_rlc.bin");
168
169 MODULE_FIRMWARE("amdgpu/vegam_ce.bin");
170 MODULE_FIRMWARE("amdgpu/vegam_pfp.bin");
171 MODULE_FIRMWARE("amdgpu/vegam_me.bin");
172 MODULE_FIRMWARE("amdgpu/vegam_mec.bin");
173 MODULE_FIRMWARE("amdgpu/vegam_mec2.bin");
174 MODULE_FIRMWARE("amdgpu/vegam_rlc.bin");
175
176 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
177 {
178         {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
179         {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
180         {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
181         {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
182         {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
183         {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
184         {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
185         {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
186         {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
187         {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
188         {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
189         {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
190         {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
191         {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
192         {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
193         {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
194 };
195
196 static const u32 golden_settings_tonga_a11[] =
197 {
198         mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
199         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
200         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
201         mmGB_GPU_ID, 0x0000000f, 0x00000000,
202         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
203         mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
204         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
205         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
206         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
207         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
208         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
209         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
210         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
211         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
212         mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
213         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
214 };
215
216 static const u32 tonga_golden_common_all[] =
217 {
218         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
219         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
220         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
221         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
222         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
223         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
224         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
225         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
226 };
227
228 static const u32 tonga_mgcg_cgcg_init[] =
229 {
230         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
231         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
232         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
233         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
234         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
235         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
236         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
237         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
238         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
239         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
240         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
241         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
242         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
243         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
244         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
245         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
246         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
247         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
248         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
249         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
250         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
251         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
252         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
253         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
254         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
255         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
256         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
257         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
258         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
259         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
260         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
261         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
262         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
263         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
264         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
265         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
266         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
267         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
268         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
269         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
270         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
271         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
272         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
273         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
274         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
275         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
276         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
277         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
278         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
279         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
280         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
281         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
282         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
283         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
284         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
285         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
286         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
287         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
288         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
289         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
290         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
291         mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
292         mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
293         mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
294         mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
295         mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
296         mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
297         mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
298         mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
299         mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
300         mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
301         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
302         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
303         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
304         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
305 };
306
307 static const u32 golden_settings_vegam_a11[] =
308 {
309         mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
310         mmCB_HW_CONTROL_2, 0x0f000000, 0x0d000000,
311         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
312         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
313         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
314         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
315         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x3a00161a,
316         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002e,
317         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
318         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
319         mmSQ_CONFIG, 0x07f80000, 0x01180000,
320         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
321         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
322         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
323         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
324         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x32761054,
325         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
326 };
327
328 static const u32 vegam_golden_common_all[] =
329 {
330         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
331         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
332         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
333         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
334         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
335         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
336 };
337
338 static const u32 golden_settings_polaris11_a11[] =
339 {
340         mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208,
341         mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
342         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
343         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
344         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
345         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
346         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
347         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
348         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
349         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
350         mmSQ_CONFIG, 0x07f80000, 0x01180000,
351         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
352         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
353         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
354         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
355         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
356         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
357 };
358
359 static const u32 polaris11_golden_common_all[] =
360 {
361         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
362         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002,
363         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
364         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
365         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
366         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
367 };
368
369 static const u32 golden_settings_polaris10_a11[] =
370 {
371         mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
372         mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
373         mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
374         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
375         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
376         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
377         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
378         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
379         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a,
380         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
381         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
382         mmSQ_CONFIG, 0x07f80000, 0x07180000,
383         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
384         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
385         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
386         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
387         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
388 };
389
390 static const u32 polaris10_golden_common_all[] =
391 {
392         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
393         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
394         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
395         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
396         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
397         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
398         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
399         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
400 };
401
402 static const u32 fiji_golden_common_all[] =
403 {
404         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
405         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
406         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
407         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
408         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
409         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
410         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
411         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
412         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
413         mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
414 };
415
416 static const u32 golden_settings_fiji_a10[] =
417 {
418         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
419         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
420         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
421         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
422         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
423         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
424         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
425         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
426         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
427         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
428         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
429 };
430
431 static const u32 fiji_mgcg_cgcg_init[] =
432 {
433         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
434         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
435         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
436         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
437         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
438         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
439         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
440         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
441         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
442         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
443         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
444         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
445         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
446         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
447         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
448         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
449         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
450         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
451         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
452         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
453         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
454         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
455         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
456         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
457         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
458         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
459         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
460         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
461         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
462         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
463         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
464         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
465         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
466         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
467         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
468 };
469
470 static const u32 golden_settings_iceland_a11[] =
471 {
472         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
473         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
474         mmDB_DEBUG3, 0xc0000000, 0xc0000000,
475         mmGB_GPU_ID, 0x0000000f, 0x00000000,
476         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
477         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
478         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
479         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
480         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
481         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
482         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
483         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
484         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
485         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
486         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
487         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
488 };
489
490 static const u32 iceland_golden_common_all[] =
491 {
492         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
493         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
494         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
495         mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
496         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
497         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
498         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
499         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
500 };
501
502 static const u32 iceland_mgcg_cgcg_init[] =
503 {
504         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
505         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
506         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
507         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
508         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
509         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
510         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
511         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
512         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
513         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
514         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
515         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
516         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
517         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
518         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
519         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
520         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
521         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
522         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
523         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
524         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
525         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
526         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
527         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
528         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
529         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
530         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
531         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
532         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
533         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
534         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
535         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
536         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
537         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
538         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
539         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
540         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
541         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
542         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
543         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
544         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
545         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
546         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
547         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
548         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
549         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
550         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
551         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
552         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
553         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
554         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
555         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
556         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
557         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
558         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
559         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
560         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
561         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
562         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
563         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
564         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
565         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
566         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
567         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
568 };
569
570 static const u32 cz_golden_settings_a11[] =
571 {
572         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
573         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
574         mmGB_GPU_ID, 0x0000000f, 0x00000000,
575         mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
576         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
577         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
578         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
579         mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
580         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
581         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
582         mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
583         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
584 };
585
586 static const u32 cz_golden_common_all[] =
587 {
588         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
589         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
590         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
591         mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
592         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
593         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
594         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
595         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
596 };
597
598 static const u32 cz_mgcg_cgcg_init[] =
599 {
600         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
601         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
602         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
603         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
604         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
605         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
606         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
607         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
608         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
609         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
610         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
611         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
612         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
613         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
614         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
615         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
616         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
617         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
618         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
619         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
620         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
621         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
622         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
623         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
624         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
625         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
626         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
627         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
628         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
629         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
630         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
631         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
632         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
633         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
634         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
635         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
636         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
637         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
638         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
639         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
640         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
641         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
642         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
643         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
644         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
645         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
646         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
647         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
648         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
649         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
650         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
651         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
652         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
653         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
654         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
655         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
656         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
657         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
658         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
659         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
660         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
661         mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
662         mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
663         mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
664         mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
665         mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
666         mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
667         mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
668         mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
669         mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
670         mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
671         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
672         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
673         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
674         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
675 };
676
677 static const u32 stoney_golden_settings_a11[] =
678 {
679         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
680         mmGB_GPU_ID, 0x0000000f, 0x00000000,
681         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
682         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
683         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
684         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
685         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
686         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
687         mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
688         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
689 };
690
691 static const u32 stoney_golden_common_all[] =
692 {
693         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
694         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
695         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
696         mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
697         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
698         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
699         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
700         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
701 };
702
703 static const u32 stoney_mgcg_cgcg_init[] =
704 {
705         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
706         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
707         mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
708         mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
709         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
710 };
711
712
713 static const char * const sq_edc_source_names[] = {
714         "SQ_EDC_INFO_SOURCE_INVALID: No EDC error has occurred",
715         "SQ_EDC_INFO_SOURCE_INST: EDC source is Instruction Fetch",
716         "SQ_EDC_INFO_SOURCE_SGPR: EDC source is SGPR or SQC data return",
717         "SQ_EDC_INFO_SOURCE_VGPR: EDC source is VGPR",
718         "SQ_EDC_INFO_SOURCE_LDS: EDC source is LDS",
719         "SQ_EDC_INFO_SOURCE_GDS: EDC source is GDS",
720         "SQ_EDC_INFO_SOURCE_TA: EDC source is TA",
721 };
722
723 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
724 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
725 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
726 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
727 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
728 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
729 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring);
730 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring);
731
732 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
733 {
734         switch (adev->asic_type) {
735         case CHIP_TOPAZ:
736                 amdgpu_device_program_register_sequence(adev,
737                                                         iceland_mgcg_cgcg_init,
738                                                         ARRAY_SIZE(iceland_mgcg_cgcg_init));
739                 amdgpu_device_program_register_sequence(adev,
740                                                         golden_settings_iceland_a11,
741                                                         ARRAY_SIZE(golden_settings_iceland_a11));
742                 amdgpu_device_program_register_sequence(adev,
743                                                         iceland_golden_common_all,
744                                                         ARRAY_SIZE(iceland_golden_common_all));
745                 break;
746         case CHIP_FIJI:
747                 amdgpu_device_program_register_sequence(adev,
748                                                         fiji_mgcg_cgcg_init,
749                                                         ARRAY_SIZE(fiji_mgcg_cgcg_init));
750                 amdgpu_device_program_register_sequence(adev,
751                                                         golden_settings_fiji_a10,
752                                                         ARRAY_SIZE(golden_settings_fiji_a10));
753                 amdgpu_device_program_register_sequence(adev,
754                                                         fiji_golden_common_all,
755                                                         ARRAY_SIZE(fiji_golden_common_all));
756                 break;
757
758         case CHIP_TONGA:
759                 amdgpu_device_program_register_sequence(adev,
760                                                         tonga_mgcg_cgcg_init,
761                                                         ARRAY_SIZE(tonga_mgcg_cgcg_init));
762                 amdgpu_device_program_register_sequence(adev,
763                                                         golden_settings_tonga_a11,
764                                                         ARRAY_SIZE(golden_settings_tonga_a11));
765                 amdgpu_device_program_register_sequence(adev,
766                                                         tonga_golden_common_all,
767                                                         ARRAY_SIZE(tonga_golden_common_all));
768                 break;
769         case CHIP_VEGAM:
770                 amdgpu_device_program_register_sequence(adev,
771                                                         golden_settings_vegam_a11,
772                                                         ARRAY_SIZE(golden_settings_vegam_a11));
773                 amdgpu_device_program_register_sequence(adev,
774                                                         vegam_golden_common_all,
775                                                         ARRAY_SIZE(vegam_golden_common_all));
776                 break;
777         case CHIP_POLARIS11:
778         case CHIP_POLARIS12:
779                 amdgpu_device_program_register_sequence(adev,
780                                                         golden_settings_polaris11_a11,
781                                                         ARRAY_SIZE(golden_settings_polaris11_a11));
782                 amdgpu_device_program_register_sequence(adev,
783                                                         polaris11_golden_common_all,
784                                                         ARRAY_SIZE(polaris11_golden_common_all));
785                 break;
786         case CHIP_POLARIS10:
787                 amdgpu_device_program_register_sequence(adev,
788                                                         golden_settings_polaris10_a11,
789                                                         ARRAY_SIZE(golden_settings_polaris10_a11));
790                 amdgpu_device_program_register_sequence(adev,
791                                                         polaris10_golden_common_all,
792                                                         ARRAY_SIZE(polaris10_golden_common_all));
793                 WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C);
794                 if (adev->pdev->revision == 0xc7 &&
795                     ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) ||
796                      (adev->pdev->subsystem_device == 0x4a8 && adev->pdev->subsystem_vendor == 0x1043) ||
797                      (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1682))) {
798                         amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD);
799                         amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0);
800                 }
801                 break;
802         case CHIP_CARRIZO:
803                 amdgpu_device_program_register_sequence(adev,
804                                                         cz_mgcg_cgcg_init,
805                                                         ARRAY_SIZE(cz_mgcg_cgcg_init));
806                 amdgpu_device_program_register_sequence(adev,
807                                                         cz_golden_settings_a11,
808                                                         ARRAY_SIZE(cz_golden_settings_a11));
809                 amdgpu_device_program_register_sequence(adev,
810                                                         cz_golden_common_all,
811                                                         ARRAY_SIZE(cz_golden_common_all));
812                 break;
813         case CHIP_STONEY:
814                 amdgpu_device_program_register_sequence(adev,
815                                                         stoney_mgcg_cgcg_init,
816                                                         ARRAY_SIZE(stoney_mgcg_cgcg_init));
817                 amdgpu_device_program_register_sequence(adev,
818                                                         stoney_golden_settings_a11,
819                                                         ARRAY_SIZE(stoney_golden_settings_a11));
820                 amdgpu_device_program_register_sequence(adev,
821                                                         stoney_golden_common_all,
822                                                         ARRAY_SIZE(stoney_golden_common_all));
823                 break;
824         default:
825                 break;
826         }
827 }
828
829 static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
830 {
831         adev->gfx.scratch.num_reg = 8;
832         adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
833         adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
834 }
835
836 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
837 {
838         struct amdgpu_device *adev = ring->adev;
839         uint32_t scratch;
840         uint32_t tmp = 0;
841         unsigned i;
842         int r;
843
844         r = amdgpu_gfx_scratch_get(adev, &scratch);
845         if (r)
846                 return r;
847
848         WREG32(scratch, 0xCAFEDEAD);
849         r = amdgpu_ring_alloc(ring, 3);
850         if (r)
851                 goto error_free_scratch;
852
853         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
854         amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
855         amdgpu_ring_write(ring, 0xDEADBEEF);
856         amdgpu_ring_commit(ring);
857
858         for (i = 0; i < adev->usec_timeout; i++) {
859                 tmp = RREG32(scratch);
860                 if (tmp == 0xDEADBEEF)
861                         break;
862                 udelay(1);
863         }
864
865         if (i >= adev->usec_timeout)
866                 r = -ETIMEDOUT;
867
868 error_free_scratch:
869         amdgpu_gfx_scratch_free(adev, scratch);
870         return r;
871 }
872
873 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
874 {
875         struct amdgpu_device *adev = ring->adev;
876         struct amdgpu_ib ib;
877         struct dma_fence *f = NULL;
878
879         unsigned int index;
880         uint64_t gpu_addr;
881         uint32_t tmp;
882         long r;
883
884         r = amdgpu_device_wb_get(adev, &index);
885         if (r)
886                 return r;
887
888         gpu_addr = adev->wb.gpu_addr + (index * 4);
889         adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
890         memset(&ib, 0, sizeof(ib));
891         r = amdgpu_ib_get(adev, NULL, 16,
892                                         AMDGPU_IB_POOL_DIRECT, &ib);
893         if (r)
894                 goto err1;
895
896         ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
897         ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
898         ib.ptr[2] = lower_32_bits(gpu_addr);
899         ib.ptr[3] = upper_32_bits(gpu_addr);
900         ib.ptr[4] = 0xDEADBEEF;
901         ib.length_dw = 5;
902
903         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
904         if (r)
905                 goto err2;
906
907         r = dma_fence_wait_timeout(f, false, timeout);
908         if (r == 0) {
909                 r = -ETIMEDOUT;
910                 goto err2;
911         } else if (r < 0) {
912                 goto err2;
913         }
914
915         tmp = adev->wb.wb[index];
916         if (tmp == 0xDEADBEEF)
917                 r = 0;
918         else
919                 r = -EINVAL;
920
921 err2:
922         amdgpu_ib_free(adev, &ib, NULL);
923         dma_fence_put(f);
924 err1:
925         amdgpu_device_wb_free(adev, index);
926         return r;
927 }
928
929
930 static void gfx_v8_0_free_microcode(struct amdgpu_device *adev)
931 {
932         release_firmware(adev->gfx.pfp_fw);
933         adev->gfx.pfp_fw = NULL;
934         release_firmware(adev->gfx.me_fw);
935         adev->gfx.me_fw = NULL;
936         release_firmware(adev->gfx.ce_fw);
937         adev->gfx.ce_fw = NULL;
938         release_firmware(adev->gfx.rlc_fw);
939         adev->gfx.rlc_fw = NULL;
940         release_firmware(adev->gfx.mec_fw);
941         adev->gfx.mec_fw = NULL;
942         if ((adev->asic_type != CHIP_STONEY) &&
943             (adev->asic_type != CHIP_TOPAZ))
944                 release_firmware(adev->gfx.mec2_fw);
945         adev->gfx.mec2_fw = NULL;
946
947         kfree(adev->gfx.rlc.register_list_format);
948 }
949
950 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
951 {
952         const char *chip_name;
953         char fw_name[30];
954         int err;
955         struct amdgpu_firmware_info *info = NULL;
956         const struct common_firmware_header *header = NULL;
957         const struct gfx_firmware_header_v1_0 *cp_hdr;
958         const struct rlc_firmware_header_v2_0 *rlc_hdr;
959         unsigned int *tmp = NULL, i;
960
961         DRM_DEBUG("\n");
962
963         switch (adev->asic_type) {
964         case CHIP_TOPAZ:
965                 chip_name = "topaz";
966                 break;
967         case CHIP_TONGA:
968                 chip_name = "tonga";
969                 break;
970         case CHIP_CARRIZO:
971                 chip_name = "carrizo";
972                 break;
973         case CHIP_FIJI:
974                 chip_name = "fiji";
975                 break;
976         case CHIP_STONEY:
977                 chip_name = "stoney";
978                 break;
979         case CHIP_POLARIS10:
980                 chip_name = "polaris10";
981                 break;
982         case CHIP_POLARIS11:
983                 chip_name = "polaris11";
984                 break;
985         case CHIP_POLARIS12:
986                 chip_name = "polaris12";
987                 break;
988         case CHIP_VEGAM:
989                 chip_name = "vegam";
990                 break;
991         default:
992                 BUG();
993         }
994
995         if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
996                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp_2.bin", chip_name);
997                 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
998                 if (err == -ENOENT) {
999                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
1000                         err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
1001                 }
1002         } else {
1003                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
1004                 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
1005         }
1006         if (err)
1007                 goto out;
1008         err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
1009         if (err)
1010                 goto out;
1011         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1012         adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1013         adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1014
1015         if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1016                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me_2.bin", chip_name);
1017                 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1018                 if (err == -ENOENT) {
1019                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1020                         err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1021                 }
1022         } else {
1023                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1024                 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1025         }
1026         if (err)
1027                 goto out;
1028         err = amdgpu_ucode_validate(adev->gfx.me_fw);
1029         if (err)
1030                 goto out;
1031         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1032         adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1033
1034         adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1035
1036         if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1037                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce_2.bin", chip_name);
1038                 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1039                 if (err == -ENOENT) {
1040                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1041                         err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1042                 }
1043         } else {
1044                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1045                 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1046         }
1047         if (err)
1048                 goto out;
1049         err = amdgpu_ucode_validate(adev->gfx.ce_fw);
1050         if (err)
1051                 goto out;
1052         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1053         adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1054         adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1055
1056         /*
1057          * Support for MCBP/Virtualization in combination with chained IBs is
1058          * formal released on feature version #46
1059          */
1060         if (adev->gfx.ce_feature_version >= 46 &&
1061             adev->gfx.pfp_feature_version >= 46) {
1062                 adev->virt.chained_ib_support = true;
1063                 DRM_INFO("Chained IB support enabled!\n");
1064         } else
1065                 adev->virt.chained_ib_support = false;
1066
1067         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
1068         err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
1069         if (err)
1070                 goto out;
1071         err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
1072         rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1073         adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
1074         adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
1075
1076         adev->gfx.rlc.save_and_restore_offset =
1077                         le32_to_cpu(rlc_hdr->save_and_restore_offset);
1078         adev->gfx.rlc.clear_state_descriptor_offset =
1079                         le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
1080         adev->gfx.rlc.avail_scratch_ram_locations =
1081                         le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
1082         adev->gfx.rlc.reg_restore_list_size =
1083                         le32_to_cpu(rlc_hdr->reg_restore_list_size);
1084         adev->gfx.rlc.reg_list_format_start =
1085                         le32_to_cpu(rlc_hdr->reg_list_format_start);
1086         adev->gfx.rlc.reg_list_format_separate_start =
1087                         le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
1088         adev->gfx.rlc.starting_offsets_start =
1089                         le32_to_cpu(rlc_hdr->starting_offsets_start);
1090         adev->gfx.rlc.reg_list_format_size_bytes =
1091                         le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
1092         adev->gfx.rlc.reg_list_size_bytes =
1093                         le32_to_cpu(rlc_hdr->reg_list_size_bytes);
1094
1095         adev->gfx.rlc.register_list_format =
1096                         kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
1097                                         adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
1098
1099         if (!adev->gfx.rlc.register_list_format) {
1100                 err = -ENOMEM;
1101                 goto out;
1102         }
1103
1104         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1105                         le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1106         for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++)
1107                 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
1108
1109         adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1110
1111         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1112                         le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1113         for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++)
1114                 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
1115
1116         if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1117                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec_2.bin", chip_name);
1118                 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1119                 if (err == -ENOENT) {
1120                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1121                         err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1122                 }
1123         } else {
1124                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1125                 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1126         }
1127         if (err)
1128                 goto out;
1129         err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1130         if (err)
1131                 goto out;
1132         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1133         adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1134         adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1135
1136         if ((adev->asic_type != CHIP_STONEY) &&
1137             (adev->asic_type != CHIP_TOPAZ)) {
1138                 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1139                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2_2.bin", chip_name);
1140                         err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1141                         if (err == -ENOENT) {
1142                                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1143                                 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1144                         }
1145                 } else {
1146                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1147                         err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1148                 }
1149                 if (!err) {
1150                         err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1151                         if (err)
1152                                 goto out;
1153                         cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1154                                 adev->gfx.mec2_fw->data;
1155                         adev->gfx.mec2_fw_version =
1156                                 le32_to_cpu(cp_hdr->header.ucode_version);
1157                         adev->gfx.mec2_feature_version =
1158                                 le32_to_cpu(cp_hdr->ucode_feature_version);
1159                 } else {
1160                         err = 0;
1161                         adev->gfx.mec2_fw = NULL;
1162                 }
1163         }
1164
1165         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1166         info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1167         info->fw = adev->gfx.pfp_fw;
1168         header = (const struct common_firmware_header *)info->fw->data;
1169         adev->firmware.fw_size +=
1170                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1171
1172         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1173         info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1174         info->fw = adev->gfx.me_fw;
1175         header = (const struct common_firmware_header *)info->fw->data;
1176         adev->firmware.fw_size +=
1177                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1178
1179         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1180         info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1181         info->fw = adev->gfx.ce_fw;
1182         header = (const struct common_firmware_header *)info->fw->data;
1183         adev->firmware.fw_size +=
1184                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1185
1186         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1187         info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1188         info->fw = adev->gfx.rlc_fw;
1189         header = (const struct common_firmware_header *)info->fw->data;
1190         adev->firmware.fw_size +=
1191                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1192
1193         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1194         info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1195         info->fw = adev->gfx.mec_fw;
1196         header = (const struct common_firmware_header *)info->fw->data;
1197         adev->firmware.fw_size +=
1198                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1199
1200         /* we need account JT in */
1201         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1202         adev->firmware.fw_size +=
1203                 ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE);
1204
1205         if (amdgpu_sriov_vf(adev)) {
1206                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_STORAGE];
1207                 info->ucode_id = AMDGPU_UCODE_ID_STORAGE;
1208                 info->fw = adev->gfx.mec_fw;
1209                 adev->firmware.fw_size +=
1210                         ALIGN(le32_to_cpu(64 * PAGE_SIZE), PAGE_SIZE);
1211         }
1212
1213         if (adev->gfx.mec2_fw) {
1214                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1215                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1216                 info->fw = adev->gfx.mec2_fw;
1217                 header = (const struct common_firmware_header *)info->fw->data;
1218                 adev->firmware.fw_size +=
1219                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1220         }
1221
1222 out:
1223         if (err) {
1224                 dev_err(adev->dev,
1225                         "gfx8: Failed to load firmware \"%s\"\n",
1226                         fw_name);
1227                 release_firmware(adev->gfx.pfp_fw);
1228                 adev->gfx.pfp_fw = NULL;
1229                 release_firmware(adev->gfx.me_fw);
1230                 adev->gfx.me_fw = NULL;
1231                 release_firmware(adev->gfx.ce_fw);
1232                 adev->gfx.ce_fw = NULL;
1233                 release_firmware(adev->gfx.rlc_fw);
1234                 adev->gfx.rlc_fw = NULL;
1235                 release_firmware(adev->gfx.mec_fw);
1236                 adev->gfx.mec_fw = NULL;
1237                 release_firmware(adev->gfx.mec2_fw);
1238                 adev->gfx.mec2_fw = NULL;
1239         }
1240         return err;
1241 }
1242
1243 static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
1244                                     volatile u32 *buffer)
1245 {
1246         u32 count = 0, i;
1247         const struct cs_section_def *sect = NULL;
1248         const struct cs_extent_def *ext = NULL;
1249
1250         if (adev->gfx.rlc.cs_data == NULL)
1251                 return;
1252         if (buffer == NULL)
1253                 return;
1254
1255         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1256         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1257
1258         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1259         buffer[count++] = cpu_to_le32(0x80000000);
1260         buffer[count++] = cpu_to_le32(0x80000000);
1261
1262         for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1263                 for (ext = sect->section; ext->extent != NULL; ++ext) {
1264                         if (sect->id == SECT_CONTEXT) {
1265                                 buffer[count++] =
1266                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1267                                 buffer[count++] = cpu_to_le32(ext->reg_index -
1268                                                 PACKET3_SET_CONTEXT_REG_START);
1269                                 for (i = 0; i < ext->reg_count; i++)
1270                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
1271                         } else {
1272                                 return;
1273                         }
1274                 }
1275         }
1276
1277         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1278         buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
1279                         PACKET3_SET_CONTEXT_REG_START);
1280         buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config);
1281         buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1);
1282
1283         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1284         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1285
1286         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1287         buffer[count++] = cpu_to_le32(0);
1288 }
1289
1290 static int gfx_v8_0_cp_jump_table_num(struct amdgpu_device *adev)
1291 {
1292         if (adev->asic_type == CHIP_CARRIZO)
1293                 return 5;
1294         else
1295                 return 4;
1296 }
1297
1298 static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
1299 {
1300         const struct cs_section_def *cs_data;
1301         int r;
1302
1303         adev->gfx.rlc.cs_data = vi_cs_data;
1304
1305         cs_data = adev->gfx.rlc.cs_data;
1306
1307         if (cs_data) {
1308                 /* init clear state block */
1309                 r = amdgpu_gfx_rlc_init_csb(adev);
1310                 if (r)
1311                         return r;
1312         }
1313
1314         if ((adev->asic_type == CHIP_CARRIZO) ||
1315             (adev->asic_type == CHIP_STONEY)) {
1316                 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1317                 r = amdgpu_gfx_rlc_init_cpt(adev);
1318                 if (r)
1319                         return r;
1320         }
1321
1322         /* init spm vmid with 0xf */
1323         if (adev->gfx.rlc.funcs->update_spm_vmid)
1324                 adev->gfx.rlc.funcs->update_spm_vmid(adev, 0xf);
1325
1326         return 0;
1327 }
1328
1329 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
1330 {
1331         amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1332 }
1333
1334 static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
1335 {
1336         int r;
1337         u32 *hpd;
1338         size_t mec_hpd_size;
1339
1340         bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1341
1342         /* take ownership of the relevant compute queues */
1343         amdgpu_gfx_compute_queue_acquire(adev);
1344
1345         mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE;
1346         if (mec_hpd_size) {
1347                 r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1348                                               AMDGPU_GEM_DOMAIN_VRAM,
1349                                               &adev->gfx.mec.hpd_eop_obj,
1350                                               &adev->gfx.mec.hpd_eop_gpu_addr,
1351                                               (void **)&hpd);
1352                 if (r) {
1353                         dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1354                         return r;
1355                 }
1356
1357                 memset(hpd, 0, mec_hpd_size);
1358
1359                 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1360                 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1361         }
1362
1363         return 0;
1364 }
1365
1366 static const u32 vgpr_init_compute_shader[] =
1367 {
1368         0x7e000209, 0x7e020208,
1369         0x7e040207, 0x7e060206,
1370         0x7e080205, 0x7e0a0204,
1371         0x7e0c0203, 0x7e0e0202,
1372         0x7e100201, 0x7e120200,
1373         0x7e140209, 0x7e160208,
1374         0x7e180207, 0x7e1a0206,
1375         0x7e1c0205, 0x7e1e0204,
1376         0x7e200203, 0x7e220202,
1377         0x7e240201, 0x7e260200,
1378         0x7e280209, 0x7e2a0208,
1379         0x7e2c0207, 0x7e2e0206,
1380         0x7e300205, 0x7e320204,
1381         0x7e340203, 0x7e360202,
1382         0x7e380201, 0x7e3a0200,
1383         0x7e3c0209, 0x7e3e0208,
1384         0x7e400207, 0x7e420206,
1385         0x7e440205, 0x7e460204,
1386         0x7e480203, 0x7e4a0202,
1387         0x7e4c0201, 0x7e4e0200,
1388         0x7e500209, 0x7e520208,
1389         0x7e540207, 0x7e560206,
1390         0x7e580205, 0x7e5a0204,
1391         0x7e5c0203, 0x7e5e0202,
1392         0x7e600201, 0x7e620200,
1393         0x7e640209, 0x7e660208,
1394         0x7e680207, 0x7e6a0206,
1395         0x7e6c0205, 0x7e6e0204,
1396         0x7e700203, 0x7e720202,
1397         0x7e740201, 0x7e760200,
1398         0x7e780209, 0x7e7a0208,
1399         0x7e7c0207, 0x7e7e0206,
1400         0xbf8a0000, 0xbf810000,
1401 };
1402
1403 static const u32 sgpr_init_compute_shader[] =
1404 {
1405         0xbe8a0100, 0xbe8c0102,
1406         0xbe8e0104, 0xbe900106,
1407         0xbe920108, 0xbe940100,
1408         0xbe960102, 0xbe980104,
1409         0xbe9a0106, 0xbe9c0108,
1410         0xbe9e0100, 0xbea00102,
1411         0xbea20104, 0xbea40106,
1412         0xbea60108, 0xbea80100,
1413         0xbeaa0102, 0xbeac0104,
1414         0xbeae0106, 0xbeb00108,
1415         0xbeb20100, 0xbeb40102,
1416         0xbeb60104, 0xbeb80106,
1417         0xbeba0108, 0xbebc0100,
1418         0xbebe0102, 0xbec00104,
1419         0xbec20106, 0xbec40108,
1420         0xbec60100, 0xbec80102,
1421         0xbee60004, 0xbee70005,
1422         0xbeea0006, 0xbeeb0007,
1423         0xbee80008, 0xbee90009,
1424         0xbefc0000, 0xbf8a0000,
1425         0xbf810000, 0x00000000,
1426 };
1427
1428 static const u32 vgpr_init_regs[] =
1429 {
1430         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1431         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */
1432         mmCOMPUTE_NUM_THREAD_X, 256*4,
1433         mmCOMPUTE_NUM_THREAD_Y, 1,
1434         mmCOMPUTE_NUM_THREAD_Z, 1,
1435         mmCOMPUTE_PGM_RSRC1, 0x100004f, /* VGPRS=15 (64 logical VGPRs), SGPRS=1 (16 SGPRs), BULKY=1 */
1436         mmCOMPUTE_PGM_RSRC2, 20,
1437         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1438         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1439         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1440         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1441         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1442         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1443         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1444         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1445         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1446         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1447 };
1448
1449 static const u32 sgpr1_init_regs[] =
1450 {
1451         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1452         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */
1453         mmCOMPUTE_NUM_THREAD_X, 256*5,
1454         mmCOMPUTE_NUM_THREAD_Y, 1,
1455         mmCOMPUTE_NUM_THREAD_Z, 1,
1456         mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */
1457         mmCOMPUTE_PGM_RSRC2, 20,
1458         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1459         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1460         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1461         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1462         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1463         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1464         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1465         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1466         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1467         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1468 };
1469
1470 static const u32 sgpr2_init_regs[] =
1471 {
1472         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1473         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1474         mmCOMPUTE_NUM_THREAD_X, 256*5,
1475         mmCOMPUTE_NUM_THREAD_Y, 1,
1476         mmCOMPUTE_NUM_THREAD_Z, 1,
1477         mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */
1478         mmCOMPUTE_PGM_RSRC2, 20,
1479         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1480         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1481         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1482         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1483         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1484         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1485         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1486         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1487         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1488         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1489 };
1490
1491 static const u32 sec_ded_counter_registers[] =
1492 {
1493         mmCPC_EDC_ATC_CNT,
1494         mmCPC_EDC_SCRATCH_CNT,
1495         mmCPC_EDC_UCODE_CNT,
1496         mmCPF_EDC_ATC_CNT,
1497         mmCPF_EDC_ROQ_CNT,
1498         mmCPF_EDC_TAG_CNT,
1499         mmCPG_EDC_ATC_CNT,
1500         mmCPG_EDC_DMA_CNT,
1501         mmCPG_EDC_TAG_CNT,
1502         mmDC_EDC_CSINVOC_CNT,
1503         mmDC_EDC_RESTORE_CNT,
1504         mmDC_EDC_STATE_CNT,
1505         mmGDS_EDC_CNT,
1506         mmGDS_EDC_GRBM_CNT,
1507         mmGDS_EDC_OA_DED,
1508         mmSPI_EDC_CNT,
1509         mmSQC_ATC_EDC_GATCL1_CNT,
1510         mmSQC_EDC_CNT,
1511         mmSQ_EDC_DED_CNT,
1512         mmSQ_EDC_INFO,
1513         mmSQ_EDC_SEC_CNT,
1514         mmTCC_EDC_CNT,
1515         mmTCP_ATC_EDC_GATCL1_CNT,
1516         mmTCP_EDC_CNT,
1517         mmTD_EDC_CNT
1518 };
1519
1520 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1521 {
1522         struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1523         struct amdgpu_ib ib;
1524         struct dma_fence *f = NULL;
1525         int r, i;
1526         u32 tmp;
1527         unsigned total_size, vgpr_offset, sgpr_offset;
1528         u64 gpu_addr;
1529
1530         /* only supported on CZ */
1531         if (adev->asic_type != CHIP_CARRIZO)
1532                 return 0;
1533
1534         /* bail if the compute ring is not ready */
1535         if (!ring->sched.ready)
1536                 return 0;
1537
1538         tmp = RREG32(mmGB_EDC_MODE);
1539         WREG32(mmGB_EDC_MODE, 0);
1540
1541         total_size =
1542                 (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1543         total_size +=
1544                 (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1545         total_size +=
1546                 (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1547         total_size = ALIGN(total_size, 256);
1548         vgpr_offset = total_size;
1549         total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1550         sgpr_offset = total_size;
1551         total_size += sizeof(sgpr_init_compute_shader);
1552
1553         /* allocate an indirect buffer to put the commands in */
1554         memset(&ib, 0, sizeof(ib));
1555         r = amdgpu_ib_get(adev, NULL, total_size,
1556                                         AMDGPU_IB_POOL_DIRECT, &ib);
1557         if (r) {
1558                 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1559                 return r;
1560         }
1561
1562         /* load the compute shaders */
1563         for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1564                 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1565
1566         for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1567                 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1568
1569         /* init the ib length to 0 */
1570         ib.length_dw = 0;
1571
1572         /* VGPR */
1573         /* write the register state for the compute dispatch */
1574         for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1575                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1576                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1577                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1578         }
1579         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1580         gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1581         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1582         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1583         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1584         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1585
1586         /* write dispatch packet */
1587         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1588         ib.ptr[ib.length_dw++] = 8; /* x */
1589         ib.ptr[ib.length_dw++] = 1; /* y */
1590         ib.ptr[ib.length_dw++] = 1; /* z */
1591         ib.ptr[ib.length_dw++] =
1592                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1593
1594         /* write CS partial flush packet */
1595         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1596         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1597
1598         /* SGPR1 */
1599         /* write the register state for the compute dispatch */
1600         for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1601                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1602                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1603                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1604         }
1605         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1606         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1607         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1608         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1609         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1610         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1611
1612         /* write dispatch packet */
1613         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1614         ib.ptr[ib.length_dw++] = 8; /* x */
1615         ib.ptr[ib.length_dw++] = 1; /* y */
1616         ib.ptr[ib.length_dw++] = 1; /* z */
1617         ib.ptr[ib.length_dw++] =
1618                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1619
1620         /* write CS partial flush packet */
1621         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1622         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1623
1624         /* SGPR2 */
1625         /* write the register state for the compute dispatch */
1626         for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1627                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1628                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1629                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1630         }
1631         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1632         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1633         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1634         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1635         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1636         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1637
1638         /* write dispatch packet */
1639         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1640         ib.ptr[ib.length_dw++] = 8; /* x */
1641         ib.ptr[ib.length_dw++] = 1; /* y */
1642         ib.ptr[ib.length_dw++] = 1; /* z */
1643         ib.ptr[ib.length_dw++] =
1644                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1645
1646         /* write CS partial flush packet */
1647         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1648         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1649
1650         /* shedule the ib on the ring */
1651         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1652         if (r) {
1653                 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1654                 goto fail;
1655         }
1656
1657         /* wait for the GPU to finish processing the IB */
1658         r = dma_fence_wait(f, false);
1659         if (r) {
1660                 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1661                 goto fail;
1662         }
1663
1664         tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1665         tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1666         WREG32(mmGB_EDC_MODE, tmp);
1667
1668         tmp = RREG32(mmCC_GC_EDC_CONFIG);
1669         tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1670         WREG32(mmCC_GC_EDC_CONFIG, tmp);
1671
1672
1673         /* read back registers to clear the counters */
1674         for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1675                 RREG32(sec_ded_counter_registers[i]);
1676
1677 fail:
1678         amdgpu_ib_free(adev, &ib, NULL);
1679         dma_fence_put(f);
1680
1681         return r;
1682 }
1683
1684 static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1685 {
1686         u32 gb_addr_config;
1687         u32 mc_arb_ramcfg;
1688         u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1689         u32 tmp;
1690         int ret;
1691
1692         switch (adev->asic_type) {
1693         case CHIP_TOPAZ:
1694                 adev->gfx.config.max_shader_engines = 1;
1695                 adev->gfx.config.max_tile_pipes = 2;
1696                 adev->gfx.config.max_cu_per_sh = 6;
1697                 adev->gfx.config.max_sh_per_se = 1;
1698                 adev->gfx.config.max_backends_per_se = 2;
1699                 adev->gfx.config.max_texture_channel_caches = 2;
1700                 adev->gfx.config.max_gprs = 256;
1701                 adev->gfx.config.max_gs_threads = 32;
1702                 adev->gfx.config.max_hw_contexts = 8;
1703
1704                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1705                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1706                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1707                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1708                 gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1709                 break;
1710         case CHIP_FIJI:
1711                 adev->gfx.config.max_shader_engines = 4;
1712                 adev->gfx.config.max_tile_pipes = 16;
1713                 adev->gfx.config.max_cu_per_sh = 16;
1714                 adev->gfx.config.max_sh_per_se = 1;
1715                 adev->gfx.config.max_backends_per_se = 4;
1716                 adev->gfx.config.max_texture_channel_caches = 16;
1717                 adev->gfx.config.max_gprs = 256;
1718                 adev->gfx.config.max_gs_threads = 32;
1719                 adev->gfx.config.max_hw_contexts = 8;
1720
1721                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1722                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1723                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1724                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1725                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1726                 break;
1727         case CHIP_POLARIS11:
1728         case CHIP_POLARIS12:
1729                 ret = amdgpu_atombios_get_gfx_info(adev);
1730                 if (ret)
1731                         return ret;
1732                 adev->gfx.config.max_gprs = 256;
1733                 adev->gfx.config.max_gs_threads = 32;
1734                 adev->gfx.config.max_hw_contexts = 8;
1735
1736                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1737                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1738                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1739                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1740                 gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
1741                 break;
1742         case CHIP_POLARIS10:
1743         case CHIP_VEGAM:
1744                 ret = amdgpu_atombios_get_gfx_info(adev);
1745                 if (ret)
1746                         return ret;
1747                 adev->gfx.config.max_gprs = 256;
1748                 adev->gfx.config.max_gs_threads = 32;
1749                 adev->gfx.config.max_hw_contexts = 8;
1750
1751                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1752                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1753                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1754                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1755                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1756                 break;
1757         case CHIP_TONGA:
1758                 adev->gfx.config.max_shader_engines = 4;
1759                 adev->gfx.config.max_tile_pipes = 8;
1760                 adev->gfx.config.max_cu_per_sh = 8;
1761                 adev->gfx.config.max_sh_per_se = 1;
1762                 adev->gfx.config.max_backends_per_se = 2;
1763                 adev->gfx.config.max_texture_channel_caches = 8;
1764                 adev->gfx.config.max_gprs = 256;
1765                 adev->gfx.config.max_gs_threads = 32;
1766                 adev->gfx.config.max_hw_contexts = 8;
1767
1768                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1769                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1770                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1771                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1772                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1773                 break;
1774         case CHIP_CARRIZO:
1775                 adev->gfx.config.max_shader_engines = 1;
1776                 adev->gfx.config.max_tile_pipes = 2;
1777                 adev->gfx.config.max_sh_per_se = 1;
1778                 adev->gfx.config.max_backends_per_se = 2;
1779                 adev->gfx.config.max_cu_per_sh = 8;
1780                 adev->gfx.config.max_texture_channel_caches = 2;
1781                 adev->gfx.config.max_gprs = 256;
1782                 adev->gfx.config.max_gs_threads = 32;
1783                 adev->gfx.config.max_hw_contexts = 8;
1784
1785                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1786                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1787                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1788                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1789                 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1790                 break;
1791         case CHIP_STONEY:
1792                 adev->gfx.config.max_shader_engines = 1;
1793                 adev->gfx.config.max_tile_pipes = 2;
1794                 adev->gfx.config.max_sh_per_se = 1;
1795                 adev->gfx.config.max_backends_per_se = 1;
1796                 adev->gfx.config.max_cu_per_sh = 3;
1797                 adev->gfx.config.max_texture_channel_caches = 2;
1798                 adev->gfx.config.max_gprs = 256;
1799                 adev->gfx.config.max_gs_threads = 16;
1800                 adev->gfx.config.max_hw_contexts = 8;
1801
1802                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1803                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1804                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1805                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1806                 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1807                 break;
1808         default:
1809                 adev->gfx.config.max_shader_engines = 2;
1810                 adev->gfx.config.max_tile_pipes = 4;
1811                 adev->gfx.config.max_cu_per_sh = 2;
1812                 adev->gfx.config.max_sh_per_se = 1;
1813                 adev->gfx.config.max_backends_per_se = 2;
1814                 adev->gfx.config.max_texture_channel_caches = 4;
1815                 adev->gfx.config.max_gprs = 256;
1816                 adev->gfx.config.max_gs_threads = 32;
1817                 adev->gfx.config.max_hw_contexts = 8;
1818
1819                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1820                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1821                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1822                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1823                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1824                 break;
1825         }
1826
1827         adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1828         mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1829
1830         adev->gfx.config.num_banks = REG_GET_FIELD(mc_arb_ramcfg,
1831                                 MC_ARB_RAMCFG, NOOFBANK);
1832         adev->gfx.config.num_ranks = REG_GET_FIELD(mc_arb_ramcfg,
1833                                 MC_ARB_RAMCFG, NOOFRANKS);
1834
1835         adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1836         adev->gfx.config.mem_max_burst_length_bytes = 256;
1837         if (adev->flags & AMD_IS_APU) {
1838                 /* Get memory bank mapping mode. */
1839                 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
1840                 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1841                 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1842
1843                 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
1844                 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1845                 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1846
1847                 /* Validate settings in case only one DIMM installed. */
1848                 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
1849                         dimm00_addr_map = 0;
1850                 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
1851                         dimm01_addr_map = 0;
1852                 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
1853                         dimm10_addr_map = 0;
1854                 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
1855                         dimm11_addr_map = 0;
1856
1857                 /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1858                 /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1859                 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
1860                         adev->gfx.config.mem_row_size_in_kb = 2;
1861                 else
1862                         adev->gfx.config.mem_row_size_in_kb = 1;
1863         } else {
1864                 tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
1865                 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1866                 if (adev->gfx.config.mem_row_size_in_kb > 4)
1867                         adev->gfx.config.mem_row_size_in_kb = 4;
1868         }
1869
1870         adev->gfx.config.shader_engine_tile_size = 32;
1871         adev->gfx.config.num_gpus = 1;
1872         adev->gfx.config.multi_gpu_tile_size = 64;
1873
1874         /* fix up row size */
1875         switch (adev->gfx.config.mem_row_size_in_kb) {
1876         case 1:
1877         default:
1878                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
1879                 break;
1880         case 2:
1881                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
1882                 break;
1883         case 4:
1884                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
1885                 break;
1886         }
1887         adev->gfx.config.gb_addr_config = gb_addr_config;
1888
1889         return 0;
1890 }
1891
1892 static int gfx_v8_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
1893                                         int mec, int pipe, int queue)
1894 {
1895         int r;
1896         unsigned irq_type;
1897         struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
1898         unsigned int hw_prio;
1899
1900         ring = &adev->gfx.compute_ring[ring_id];
1901
1902         /* mec0 is me1 */
1903         ring->me = mec + 1;
1904         ring->pipe = pipe;
1905         ring->queue = queue;
1906
1907         ring->ring_obj = NULL;
1908         ring->use_doorbell = true;
1909         ring->doorbell_index = adev->doorbell_index.mec_ring0 + ring_id;
1910         ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
1911                                 + (ring_id * GFX8_MEC_HPD_SIZE);
1912         sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
1913
1914         irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
1915                 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
1916                 + ring->pipe;
1917
1918         hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring->queue) ?
1919                         AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_RING_PRIO_DEFAULT;
1920         /* type-2 packets are deprecated on MEC, use type-3 instead */
1921         r = amdgpu_ring_init(adev, ring, 1024,
1922                              &adev->gfx.eop_irq, irq_type, hw_prio);
1923         if (r)
1924                 return r;
1925
1926
1927         return 0;
1928 }
1929
1930 static void gfx_v8_0_sq_irq_work_func(struct work_struct *work);
1931
1932 static int gfx_v8_0_sw_init(void *handle)
1933 {
1934         int i, j, k, r, ring_id;
1935         struct amdgpu_ring *ring;
1936         struct amdgpu_kiq *kiq;
1937         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1938
1939         switch (adev->asic_type) {
1940         case CHIP_TONGA:
1941         case CHIP_CARRIZO:
1942         case CHIP_FIJI:
1943         case CHIP_POLARIS10:
1944         case CHIP_POLARIS11:
1945         case CHIP_POLARIS12:
1946         case CHIP_VEGAM:
1947                 adev->gfx.mec.num_mec = 2;
1948                 break;
1949         case CHIP_TOPAZ:
1950         case CHIP_STONEY:
1951         default:
1952                 adev->gfx.mec.num_mec = 1;
1953                 break;
1954         }
1955
1956         adev->gfx.mec.num_pipe_per_mec = 4;
1957         adev->gfx.mec.num_queue_per_pipe = 8;
1958
1959         /* EOP Event */
1960         r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_END_OF_PIPE, &adev->gfx.eop_irq);
1961         if (r)
1962                 return r;
1963
1964         /* Privileged reg */
1965         r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_PRIV_REG_FAULT,
1966                               &adev->gfx.priv_reg_irq);
1967         if (r)
1968                 return r;
1969
1970         /* Privileged inst */
1971         r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_PRIV_INSTR_FAULT,
1972                               &adev->gfx.priv_inst_irq);
1973         if (r)
1974                 return r;
1975
1976         /* Add CP EDC/ECC irq  */
1977         r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_ECC_ERROR,
1978                               &adev->gfx.cp_ecc_error_irq);
1979         if (r)
1980                 return r;
1981
1982         /* SQ interrupts. */
1983         r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SQ_INTERRUPT_MSG,
1984                               &adev->gfx.sq_irq);
1985         if (r) {
1986                 DRM_ERROR("amdgpu_irq_add() for SQ failed: %d\n", r);
1987                 return r;
1988         }
1989
1990         INIT_WORK(&adev->gfx.sq_work.work, gfx_v8_0_sq_irq_work_func);
1991
1992         adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
1993
1994         gfx_v8_0_scratch_init(adev);
1995
1996         r = gfx_v8_0_init_microcode(adev);
1997         if (r) {
1998                 DRM_ERROR("Failed to load gfx firmware!\n");
1999                 return r;
2000         }
2001
2002         r = adev->gfx.rlc.funcs->init(adev);
2003         if (r) {
2004                 DRM_ERROR("Failed to init rlc BOs!\n");
2005                 return r;
2006         }
2007
2008         r = gfx_v8_0_mec_init(adev);
2009         if (r) {
2010                 DRM_ERROR("Failed to init MEC BOs!\n");
2011                 return r;
2012         }
2013
2014         /* set up the gfx ring */
2015         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2016                 ring = &adev->gfx.gfx_ring[i];
2017                 ring->ring_obj = NULL;
2018                 sprintf(ring->name, "gfx");
2019                 /* no gfx doorbells on iceland */
2020                 if (adev->asic_type != CHIP_TOPAZ) {
2021                         ring->use_doorbell = true;
2022                         ring->doorbell_index = adev->doorbell_index.gfx_ring0;
2023                 }
2024
2025                 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2026                                      AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP,
2027                                      AMDGPU_RING_PRIO_DEFAULT);
2028                 if (r)
2029                         return r;
2030         }
2031
2032
2033         /* set up the compute queues - allocate horizontally across pipes */
2034         ring_id = 0;
2035         for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
2036                 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
2037                         for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
2038                                 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
2039                                         continue;
2040
2041                                 r = gfx_v8_0_compute_ring_init(adev,
2042                                                                 ring_id,
2043                                                                 i, k, j);
2044                                 if (r)
2045                                         return r;
2046
2047                                 ring_id++;
2048                         }
2049                 }
2050         }
2051
2052         r = amdgpu_gfx_kiq_init(adev, GFX8_MEC_HPD_SIZE);
2053         if (r) {
2054                 DRM_ERROR("Failed to init KIQ BOs!\n");
2055                 return r;
2056         }
2057
2058         kiq = &adev->gfx.kiq;
2059         r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
2060         if (r)
2061                 return r;
2062
2063         /* create MQD for all compute queues as well as KIQ for SRIOV case */
2064         r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct vi_mqd_allocation));
2065         if (r)
2066                 return r;
2067
2068         adev->gfx.ce_ram_size = 0x8000;
2069
2070         r = gfx_v8_0_gpu_early_init(adev);
2071         if (r)
2072                 return r;
2073
2074         return 0;
2075 }
2076
2077 static int gfx_v8_0_sw_fini(void *handle)
2078 {
2079         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2080         int i;
2081
2082         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2083                 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2084         for (i = 0; i < adev->gfx.num_compute_rings; i++)
2085                 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2086
2087         amdgpu_gfx_mqd_sw_fini(adev);
2088         amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring);
2089         amdgpu_gfx_kiq_fini(adev);
2090
2091         gfx_v8_0_mec_fini(adev);
2092         amdgpu_gfx_rlc_fini(adev);
2093         amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
2094                                 &adev->gfx.rlc.clear_state_gpu_addr,
2095                                 (void **)&adev->gfx.rlc.cs_ptr);
2096         if ((adev->asic_type == CHIP_CARRIZO) ||
2097             (adev->asic_type == CHIP_STONEY)) {
2098                 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
2099                                 &adev->gfx.rlc.cp_table_gpu_addr,
2100                                 (void **)&adev->gfx.rlc.cp_table_ptr);
2101         }
2102         gfx_v8_0_free_microcode(adev);
2103
2104         return 0;
2105 }
2106
2107 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
2108 {
2109         uint32_t *modearray, *mod2array;
2110         const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
2111         const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
2112         u32 reg_offset;
2113
2114         modearray = adev->gfx.config.tile_mode_array;
2115         mod2array = adev->gfx.config.macrotile_mode_array;
2116
2117         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2118                 modearray[reg_offset] = 0;
2119
2120         for (reg_offset = 0; reg_offset <  num_secondary_tile_mode_states; reg_offset++)
2121                 mod2array[reg_offset] = 0;
2122
2123         switch (adev->asic_type) {
2124         case CHIP_TOPAZ:
2125                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2126                                 PIPE_CONFIG(ADDR_SURF_P2) |
2127                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2128                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2129                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2130                                 PIPE_CONFIG(ADDR_SURF_P2) |
2131                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2132                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2133                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2134                                 PIPE_CONFIG(ADDR_SURF_P2) |
2135                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2136                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2137                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2138                                 PIPE_CONFIG(ADDR_SURF_P2) |
2139                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2140                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2141                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2142                                 PIPE_CONFIG(ADDR_SURF_P2) |
2143                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2144                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2145                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2146                                 PIPE_CONFIG(ADDR_SURF_P2) |
2147                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2148                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2149                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2150                                 PIPE_CONFIG(ADDR_SURF_P2) |
2151                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2152                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2153                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2154                                 PIPE_CONFIG(ADDR_SURF_P2));
2155                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2156                                 PIPE_CONFIG(ADDR_SURF_P2) |
2157                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2158                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2159                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2160                                  PIPE_CONFIG(ADDR_SURF_P2) |
2161                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2162                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2163                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2164                                  PIPE_CONFIG(ADDR_SURF_P2) |
2165                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2166                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2167                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2168                                  PIPE_CONFIG(ADDR_SURF_P2) |
2169                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2170                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2171                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2172                                  PIPE_CONFIG(ADDR_SURF_P2) |
2173                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2174                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2175                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2176                                  PIPE_CONFIG(ADDR_SURF_P2) |
2177                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2178                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2179                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2180                                  PIPE_CONFIG(ADDR_SURF_P2) |
2181                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2182                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2183                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2184                                  PIPE_CONFIG(ADDR_SURF_P2) |
2185                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2186                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2187                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2188                                  PIPE_CONFIG(ADDR_SURF_P2) |
2189                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2190                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2191                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2192                                  PIPE_CONFIG(ADDR_SURF_P2) |
2193                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2194                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2195                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2196                                  PIPE_CONFIG(ADDR_SURF_P2) |
2197                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2198                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2199                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2200                                  PIPE_CONFIG(ADDR_SURF_P2) |
2201                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2202                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2203                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2204                                  PIPE_CONFIG(ADDR_SURF_P2) |
2205                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2206                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2207                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2208                                  PIPE_CONFIG(ADDR_SURF_P2) |
2209                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2210                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2211                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2212                                  PIPE_CONFIG(ADDR_SURF_P2) |
2213                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2214                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2215                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2216                                  PIPE_CONFIG(ADDR_SURF_P2) |
2217                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2218                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2219                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2220                                  PIPE_CONFIG(ADDR_SURF_P2) |
2221                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2222                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2223                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2224                                  PIPE_CONFIG(ADDR_SURF_P2) |
2225                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2226                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2227
2228                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2229                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2230                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2231                                 NUM_BANKS(ADDR_SURF_8_BANK));
2232                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2233                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2234                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2235                                 NUM_BANKS(ADDR_SURF_8_BANK));
2236                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2237                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2238                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2239                                 NUM_BANKS(ADDR_SURF_8_BANK));
2240                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2241                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2242                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2243                                 NUM_BANKS(ADDR_SURF_8_BANK));
2244                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2245                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2246                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2247                                 NUM_BANKS(ADDR_SURF_8_BANK));
2248                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2249                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2250                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2251                                 NUM_BANKS(ADDR_SURF_8_BANK));
2252                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2253                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2254                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2255                                 NUM_BANKS(ADDR_SURF_8_BANK));
2256                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2257                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2258                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2259                                 NUM_BANKS(ADDR_SURF_16_BANK));
2260                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2261                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2262                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2263                                 NUM_BANKS(ADDR_SURF_16_BANK));
2264                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2265                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2266                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2267                                  NUM_BANKS(ADDR_SURF_16_BANK));
2268                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2269                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2270                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2271                                  NUM_BANKS(ADDR_SURF_16_BANK));
2272                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2273                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2274                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2275                                  NUM_BANKS(ADDR_SURF_16_BANK));
2276                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2277                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2278                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2279                                  NUM_BANKS(ADDR_SURF_16_BANK));
2280                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2281                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2282                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2283                                  NUM_BANKS(ADDR_SURF_8_BANK));
2284
2285                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2286                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2287                             reg_offset != 23)
2288                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2289
2290                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2291                         if (reg_offset != 7)
2292                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2293
2294                 break;
2295         case CHIP_FIJI:
2296         case CHIP_VEGAM:
2297                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2298                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2299                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2300                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2301                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2302                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2303                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2304                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2305                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2306                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2307                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2308                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2309                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2310                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2311                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2312                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2313                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2314                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2315                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2316                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2317                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2318                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2319                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2320                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2321                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2322                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2323                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2324                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2325                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2326                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2327                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2328                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2329                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2330                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2331                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2332                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2333                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2334                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2335                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2336                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2337                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2338                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2339                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2340                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2341                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2342                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2343                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2344                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2345                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2346                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2347                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2348                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2349                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2350                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2351                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2352                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2353                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2354                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2355                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2356                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2357                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2358                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2359                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2360                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2361                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2362                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2363                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2364                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2365                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2366                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2367                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2368                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2369                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2370                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2371                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2372                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2373                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2374                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2375                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2376                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2377                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2378                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2379                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2380                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2381                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2382                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2383                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2384                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2385                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2386                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2387                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2388                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2389                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2390                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2391                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2392                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2393                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2394                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2395                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2396                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2397                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2398                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2399                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2400                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2401                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2402                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2403                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2404                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2405                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2406                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2407                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2408                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2409                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2410                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2411                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2412                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2413                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2414                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2415                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2416                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2417                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2418                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2419
2420                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2421                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2422                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2423                                 NUM_BANKS(ADDR_SURF_8_BANK));
2424                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2425                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2426                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2427                                 NUM_BANKS(ADDR_SURF_8_BANK));
2428                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2429                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2430                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2431                                 NUM_BANKS(ADDR_SURF_8_BANK));
2432                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2433                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2434                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2435                                 NUM_BANKS(ADDR_SURF_8_BANK));
2436                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2437                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2438                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2439                                 NUM_BANKS(ADDR_SURF_8_BANK));
2440                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2441                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2442                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2443                                 NUM_BANKS(ADDR_SURF_8_BANK));
2444                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2445                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2446                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2447                                 NUM_BANKS(ADDR_SURF_8_BANK));
2448                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2449                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2450                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2451                                 NUM_BANKS(ADDR_SURF_8_BANK));
2452                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2453                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2454                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2455                                 NUM_BANKS(ADDR_SURF_8_BANK));
2456                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2457                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2458                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2459                                  NUM_BANKS(ADDR_SURF_8_BANK));
2460                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2461                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2462                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2463                                  NUM_BANKS(ADDR_SURF_8_BANK));
2464                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2465                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2466                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2467                                  NUM_BANKS(ADDR_SURF_8_BANK));
2468                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2469                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2470                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2471                                  NUM_BANKS(ADDR_SURF_8_BANK));
2472                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2473                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2474                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2475                                  NUM_BANKS(ADDR_SURF_4_BANK));
2476
2477                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2478                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2479
2480                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2481                         if (reg_offset != 7)
2482                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2483
2484                 break;
2485         case CHIP_TONGA:
2486                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2487                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2488                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2489                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2490                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2491                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2492                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2493                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2494                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2495                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2496                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2497                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2498                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2499                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2500                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2501                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2502                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2503                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2504                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2505                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2506                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2507                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2508                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2509                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2510                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2511                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2512                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2513                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2514                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2515                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2516                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2517                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2518                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2519                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2520                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2521                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2522                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2523                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2524                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2525                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2526                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2527                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2528                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2529                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2530                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2531                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2532                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2533                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2534                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2535                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2536                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2537                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2538                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2539                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2540                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2541                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2542                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2543                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2544                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2545                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2546                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2547                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2548                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2549                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2550                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2551                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2552                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2553                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2554                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2555                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2556                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2557                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2558                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2559                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2560                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2561                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2562                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2563                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2564                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2565                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2566                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2567                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2568                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2569                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2570                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2571                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2572                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2573                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2574                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2575                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2576                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2577                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2578                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2579                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2580                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2581                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2582                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2583                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2584                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2585                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2586                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2587                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2588                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2589                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2590                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2591                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2592                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2593                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2594                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2595                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2596                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2597                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2598                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2599                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2600                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2601                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2602                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2603                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2604                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2605                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2606                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2607                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2608
2609                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2610                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2611                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2612                                 NUM_BANKS(ADDR_SURF_16_BANK));
2613                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2614                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2615                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2616                                 NUM_BANKS(ADDR_SURF_16_BANK));
2617                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2618                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2619                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2620                                 NUM_BANKS(ADDR_SURF_16_BANK));
2621                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2622                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2623                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2624                                 NUM_BANKS(ADDR_SURF_16_BANK));
2625                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2626                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2627                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2628                                 NUM_BANKS(ADDR_SURF_16_BANK));
2629                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2630                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2631                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2632                                 NUM_BANKS(ADDR_SURF_16_BANK));
2633                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2634                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2635                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2636                                 NUM_BANKS(ADDR_SURF_16_BANK));
2637                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2638                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2639                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2640                                 NUM_BANKS(ADDR_SURF_16_BANK));
2641                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2642                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2643                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2644                                 NUM_BANKS(ADDR_SURF_16_BANK));
2645                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2646                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2647                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2648                                  NUM_BANKS(ADDR_SURF_16_BANK));
2649                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2650                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2651                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2652                                  NUM_BANKS(ADDR_SURF_16_BANK));
2653                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2654                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2655                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2656                                  NUM_BANKS(ADDR_SURF_8_BANK));
2657                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2658                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2659                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2660                                  NUM_BANKS(ADDR_SURF_4_BANK));
2661                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2662                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2663                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2664                                  NUM_BANKS(ADDR_SURF_4_BANK));
2665
2666                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2667                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2668
2669                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2670                         if (reg_offset != 7)
2671                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2672
2673                 break;
2674         case CHIP_POLARIS11:
2675         case CHIP_POLARIS12:
2676                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2677                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2678                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2679                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2680                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2681                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2682                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2683                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2684                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2685                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2686                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2687                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2688                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2689                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2690                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2691                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2692                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2693                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2694                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2695                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2696                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2697                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2698                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2699                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2700                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2701                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2702                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2703                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2704                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2705                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2706                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2707                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2708                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2709                                 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2710                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2711                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2712                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2713                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2714                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2715                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2716                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2717                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2718                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2719                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2720                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2721                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2722                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2723                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2724                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2725                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2726                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2727                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2728                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2729                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2730                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2731                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2732                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2733                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2734                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2735                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2736                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2737                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2738                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2739                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2740                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2741                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2742                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2743                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2744                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2745                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2746                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2747                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2748                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2749                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2750                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2751                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2752                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2753                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2754                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2755                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2756                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2757                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2758                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2759                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2760                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2761                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2762                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2763                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2764                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2765                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2766                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2767                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2768                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2769                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2770                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2771                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2772                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2773                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2774                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2775                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2776                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2777                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2778                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2779                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2780                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2781                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2782                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2783                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2784                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2785                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2786                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2787                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2788                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2789                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2790                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2791                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2792                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2793                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2794                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2795                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2796                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2797                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2798
2799                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2800                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2801                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2802                                 NUM_BANKS(ADDR_SURF_16_BANK));
2803
2804                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2805                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2806                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2807                                 NUM_BANKS(ADDR_SURF_16_BANK));
2808
2809                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2810                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2811                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2812                                 NUM_BANKS(ADDR_SURF_16_BANK));
2813
2814                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2815                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2816                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2817                                 NUM_BANKS(ADDR_SURF_16_BANK));
2818
2819                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2820                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2821                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2822                                 NUM_BANKS(ADDR_SURF_16_BANK));
2823
2824                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2825                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2826                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2827                                 NUM_BANKS(ADDR_SURF_16_BANK));
2828
2829                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2830                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2831                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2832                                 NUM_BANKS(ADDR_SURF_16_BANK));
2833
2834                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2835                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2836                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2837                                 NUM_BANKS(ADDR_SURF_16_BANK));
2838
2839                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2840                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2841                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2842                                 NUM_BANKS(ADDR_SURF_16_BANK));
2843
2844                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2845                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2846                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2847                                 NUM_BANKS(ADDR_SURF_16_BANK));
2848
2849                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2850                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2851                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2852                                 NUM_BANKS(ADDR_SURF_16_BANK));
2853
2854                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2855                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2856                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2857                                 NUM_BANKS(ADDR_SURF_16_BANK));
2858
2859                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2860                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2861                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2862                                 NUM_BANKS(ADDR_SURF_8_BANK));
2863
2864                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2865                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2866                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2867                                 NUM_BANKS(ADDR_SURF_4_BANK));
2868
2869                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2870                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2871
2872                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2873                         if (reg_offset != 7)
2874                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2875
2876                 break;
2877         case CHIP_POLARIS10:
2878                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2879                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2880                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2881                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2882                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2883                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2884                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2885                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2886                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2887                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2888                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2889                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2890                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2891                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2892                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2893                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2894                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2895                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2896                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2897                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2898                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2899                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2900                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2901                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2902                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2903                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2904                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2905                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2906                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2907                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2908                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2909                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2910                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2911                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2912                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2913                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2914                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2915                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2916                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2917                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2918                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2919                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2920                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2921                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2922                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2923                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2924                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2925                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2926                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2927                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2928                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2929                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2930                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2931                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2932                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2933                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2934                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2935                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2936                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2937                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2938                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2939                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2940                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2941                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2942                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2943                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2944                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2945                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2946                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2947                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2948                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2949                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2950                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2951                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2952                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2953                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2954                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2955                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2956                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2957                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2958                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2959                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2960                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2961                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2962                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2963                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2964                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2965                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2966                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2967                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2968                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2969                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2970                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2971                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2972                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2973                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2974                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2975                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2976                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2977                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2978                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2979                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2980                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2981                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2982                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2983                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2984                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2985                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2986                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2987                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2988                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2989                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2990                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2991                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2992                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2993                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2994                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2995                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2996                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2997                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2998                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2999                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3000
3001                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3002                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3003                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3004                                 NUM_BANKS(ADDR_SURF_16_BANK));
3005
3006                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3007                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3008                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3009                                 NUM_BANKS(ADDR_SURF_16_BANK));
3010
3011                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3012                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3013                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3014                                 NUM_BANKS(ADDR_SURF_16_BANK));
3015
3016                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3017                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3018                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3019                                 NUM_BANKS(ADDR_SURF_16_BANK));
3020
3021                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3022                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3023                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3024                                 NUM_BANKS(ADDR_SURF_16_BANK));
3025
3026                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3027                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3028                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3029                                 NUM_BANKS(ADDR_SURF_16_BANK));
3030
3031                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3032                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3033                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3034                                 NUM_BANKS(ADDR_SURF_16_BANK));
3035
3036                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3037                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3038                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3039                                 NUM_BANKS(ADDR_SURF_16_BANK));
3040
3041                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3042                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3043                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3044                                 NUM_BANKS(ADDR_SURF_16_BANK));
3045
3046                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3047                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3048                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3049                                 NUM_BANKS(ADDR_SURF_16_BANK));
3050
3051                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3052                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3053                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3054                                 NUM_BANKS(ADDR_SURF_16_BANK));
3055
3056                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3057                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3058                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3059                                 NUM_BANKS(ADDR_SURF_8_BANK));
3060
3061                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3062                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3063                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3064                                 NUM_BANKS(ADDR_SURF_4_BANK));
3065
3066                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3067                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3068                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3069                                 NUM_BANKS(ADDR_SURF_4_BANK));
3070
3071                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3072                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3073
3074                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3075                         if (reg_offset != 7)
3076                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3077
3078                 break;
3079         case CHIP_STONEY:
3080                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3081                                 PIPE_CONFIG(ADDR_SURF_P2) |
3082                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3083                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3084                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3085                                 PIPE_CONFIG(ADDR_SURF_P2) |
3086                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3087                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3088                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3089                                 PIPE_CONFIG(ADDR_SURF_P2) |
3090                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3091                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3092                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3093                                 PIPE_CONFIG(ADDR_SURF_P2) |
3094                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3095                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3096                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3097                                 PIPE_CONFIG(ADDR_SURF_P2) |
3098                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3099                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3100                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3101                                 PIPE_CONFIG(ADDR_SURF_P2) |
3102                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3103                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3104                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3105                                 PIPE_CONFIG(ADDR_SURF_P2) |
3106                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3107                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3108                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3109                                 PIPE_CONFIG(ADDR_SURF_P2));
3110                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3111                                 PIPE_CONFIG(ADDR_SURF_P2) |
3112                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3113                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3114                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3115                                  PIPE_CONFIG(ADDR_SURF_P2) |
3116                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3117                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3118                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3119                                  PIPE_CONFIG(ADDR_SURF_P2) |
3120                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3121                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3122                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3123                                  PIPE_CONFIG(ADDR_SURF_P2) |
3124                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3125                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3126                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3127                                  PIPE_CONFIG(ADDR_SURF_P2) |
3128                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3129                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3130                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3131                                  PIPE_CONFIG(ADDR_SURF_P2) |
3132                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3133                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3134                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3135                                  PIPE_CONFIG(ADDR_SURF_P2) |
3136                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3137                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3138                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3139                                  PIPE_CONFIG(ADDR_SURF_P2) |
3140                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3141                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3142                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3143                                  PIPE_CONFIG(ADDR_SURF_P2) |
3144                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3145                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3146                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3147                                  PIPE_CONFIG(ADDR_SURF_P2) |
3148                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3149                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3150                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3151                                  PIPE_CONFIG(ADDR_SURF_P2) |
3152                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3153                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3154                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3155                                  PIPE_CONFIG(ADDR_SURF_P2) |
3156                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3157                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3158                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3159                                  PIPE_CONFIG(ADDR_SURF_P2) |
3160                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3161                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3162                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3163                                  PIPE_CONFIG(ADDR_SURF_P2) |
3164                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3165                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3166                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3167                                  PIPE_CONFIG(ADDR_SURF_P2) |
3168                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3169                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3170                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3171                                  PIPE_CONFIG(ADDR_SURF_P2) |
3172                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3173                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3174                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3175                                  PIPE_CONFIG(ADDR_SURF_P2) |
3176                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3177                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3178                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3179                                  PIPE_CONFIG(ADDR_SURF_P2) |
3180                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3181                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3182
3183                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3184                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3185                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3186                                 NUM_BANKS(ADDR_SURF_8_BANK));
3187                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3188                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3189                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3190                                 NUM_BANKS(ADDR_SURF_8_BANK));
3191                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3192                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3193                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3194                                 NUM_BANKS(ADDR_SURF_8_BANK));
3195                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3196                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3197                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3198                                 NUM_BANKS(ADDR_SURF_8_BANK));
3199                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3200                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3201                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3202                                 NUM_BANKS(ADDR_SURF_8_BANK));
3203                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3204                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3205                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3206                                 NUM_BANKS(ADDR_SURF_8_BANK));
3207                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3208                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3209                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3210                                 NUM_BANKS(ADDR_SURF_8_BANK));
3211                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3212                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3213                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3214                                 NUM_BANKS(ADDR_SURF_16_BANK));
3215                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3216                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3217                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3218                                 NUM_BANKS(ADDR_SURF_16_BANK));
3219                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3220                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3221                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3222                                  NUM_BANKS(ADDR_SURF_16_BANK));
3223                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3224                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3225                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3226                                  NUM_BANKS(ADDR_SURF_16_BANK));
3227                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3228                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3229                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3230                                  NUM_BANKS(ADDR_SURF_16_BANK));
3231                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3232                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3233                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3234                                  NUM_BANKS(ADDR_SURF_16_BANK));
3235                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3236                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3237                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3238                                  NUM_BANKS(ADDR_SURF_8_BANK));
3239
3240                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3241                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3242                             reg_offset != 23)
3243                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3244
3245                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3246                         if (reg_offset != 7)
3247                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3248
3249                 break;
3250         default:
3251                 dev_warn(adev->dev,
3252                          "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
3253                          adev->asic_type);
3254                 fallthrough;
3255
3256         case CHIP_CARRIZO:
3257                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3258                                 PIPE_CONFIG(ADDR_SURF_P2) |
3259                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3260                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3261                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3262                                 PIPE_CONFIG(ADDR_SURF_P2) |
3263                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3264                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3265                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3266                                 PIPE_CONFIG(ADDR_SURF_P2) |
3267                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3268                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3269                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3270                                 PIPE_CONFIG(ADDR_SURF_P2) |
3271                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3272                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3273                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3274                                 PIPE_CONFIG(ADDR_SURF_P2) |
3275                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3276                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3277                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3278                                 PIPE_CONFIG(ADDR_SURF_P2) |
3279                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3280                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3281                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3282                                 PIPE_CONFIG(ADDR_SURF_P2) |
3283                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3284                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3285                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3286                                 PIPE_CONFIG(ADDR_SURF_P2));
3287                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3288                                 PIPE_CONFIG(ADDR_SURF_P2) |
3289                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3290                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3291                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3292                                  PIPE_CONFIG(ADDR_SURF_P2) |
3293                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3294                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3295                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3296                                  PIPE_CONFIG(ADDR_SURF_P2) |
3297                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3298                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3299                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3300                                  PIPE_CONFIG(ADDR_SURF_P2) |
3301                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3302                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3303                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3304                                  PIPE_CONFIG(ADDR_SURF_P2) |
3305                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3306                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3307                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3308                                  PIPE_CONFIG(ADDR_SURF_P2) |
3309                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3310                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3311                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3312                                  PIPE_CONFIG(ADDR_SURF_P2) |
3313                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3314                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3315                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3316                                  PIPE_CONFIG(ADDR_SURF_P2) |
3317                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3318                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3319                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3320                                  PIPE_CONFIG(ADDR_SURF_P2) |
3321                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3322                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3323                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3324                                  PIPE_CONFIG(ADDR_SURF_P2) |
3325                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3326                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3327                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3328                                  PIPE_CONFIG(ADDR_SURF_P2) |
3329                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3330                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3331                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3332                                  PIPE_CONFIG(ADDR_SURF_P2) |
3333                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3334                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3335                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3336                                  PIPE_CONFIG(ADDR_SURF_P2) |
3337                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3338                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3339                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3340                                  PIPE_CONFIG(ADDR_SURF_P2) |
3341                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3342                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3343                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3344                                  PIPE_CONFIG(ADDR_SURF_P2) |
3345                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3346                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3347                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3348                                  PIPE_CONFIG(ADDR_SURF_P2) |
3349                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3350                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3351                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3352                                  PIPE_CONFIG(ADDR_SURF_P2) |
3353                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3354                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3355                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3356                                  PIPE_CONFIG(ADDR_SURF_P2) |
3357                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3358                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3359
3360                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3361                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3362                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3363                                 NUM_BANKS(ADDR_SURF_8_BANK));
3364                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3365                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3366                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3367                                 NUM_BANKS(ADDR_SURF_8_BANK));
3368                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3369                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3370                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3371                                 NUM_BANKS(ADDR_SURF_8_BANK));
3372                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3373                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3374                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3375                                 NUM_BANKS(ADDR_SURF_8_BANK));
3376                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3377                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3378                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3379                                 NUM_BANKS(ADDR_SURF_8_BANK));
3380                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3381                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3382                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3383                                 NUM_BANKS(ADDR_SURF_8_BANK));
3384                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3385                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3386                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3387                                 NUM_BANKS(ADDR_SURF_8_BANK));
3388                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3389                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3390                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3391                                 NUM_BANKS(ADDR_SURF_16_BANK));
3392                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3393                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3394                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3395                                 NUM_BANKS(ADDR_SURF_16_BANK));
3396                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3397                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3398                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3399                                  NUM_BANKS(ADDR_SURF_16_BANK));
3400                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3401                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3402                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3403                                  NUM_BANKS(ADDR_SURF_16_BANK));
3404                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3405                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3406                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3407                                  NUM_BANKS(ADDR_SURF_16_BANK));
3408                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3409                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3410                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3411                                  NUM_BANKS(ADDR_SURF_16_BANK));
3412                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3413                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3414                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3415                                  NUM_BANKS(ADDR_SURF_8_BANK));
3416
3417                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3418                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3419                             reg_offset != 23)
3420                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3421
3422                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3423                         if (reg_offset != 7)
3424                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3425
3426                 break;
3427         }
3428 }
3429
3430 static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
3431                                   u32 se_num, u32 sh_num, u32 instance)
3432 {
3433         u32 data;
3434
3435         if (instance == 0xffffffff)
3436                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
3437         else
3438                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
3439
3440         if (se_num == 0xffffffff)
3441                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3442         else
3443                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3444
3445         if (sh_num == 0xffffffff)
3446                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3447         else
3448                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3449
3450         WREG32(mmGRBM_GFX_INDEX, data);
3451 }
3452
3453 static void gfx_v8_0_select_me_pipe_q(struct amdgpu_device *adev,
3454                                   u32 me, u32 pipe, u32 q, u32 vm)
3455 {
3456         vi_srbm_select(adev, me, pipe, q, vm);
3457 }
3458
3459 static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
3460 {
3461         u32 data, mask;
3462
3463         data =  RREG32(mmCC_RB_BACKEND_DISABLE) |
3464                 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3465
3466         data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE);
3467
3468         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
3469                                          adev->gfx.config.max_sh_per_se);
3470
3471         return (~data) & mask;
3472 }
3473
3474 static void
3475 gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1)
3476 {
3477         switch (adev->asic_type) {
3478         case CHIP_FIJI:
3479         case CHIP_VEGAM:
3480                 *rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) |
3481                           RB_XSEL2(1) | PKR_MAP(2) |
3482                           PKR_XSEL(1) | PKR_YSEL(1) |
3483                           SE_MAP(2) | SE_XSEL(2) | SE_YSEL(3);
3484                 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) |
3485                            SE_PAIR_YSEL(2);
3486                 break;
3487         case CHIP_TONGA:
3488         case CHIP_POLARIS10:
3489                 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3490                           SE_XSEL(1) | SE_YSEL(1);
3491                 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(2) |
3492                            SE_PAIR_YSEL(2);
3493                 break;
3494         case CHIP_TOPAZ:
3495         case CHIP_CARRIZO:
3496                 *rconf |= RB_MAP_PKR0(2);
3497                 *rconf1 |= 0x0;
3498                 break;
3499         case CHIP_POLARIS11:
3500         case CHIP_POLARIS12:
3501                 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3502                           SE_XSEL(1) | SE_YSEL(1);
3503                 *rconf1 |= 0x0;
3504                 break;
3505         case CHIP_STONEY:
3506                 *rconf |= 0x0;
3507                 *rconf1 |= 0x0;
3508                 break;
3509         default:
3510                 DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type);
3511                 break;
3512         }
3513 }
3514
3515 static void
3516 gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev,
3517                                         u32 raster_config, u32 raster_config_1,
3518                                         unsigned rb_mask, unsigned num_rb)
3519 {
3520         unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1);
3521         unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1);
3522         unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2);
3523         unsigned rb_per_se = num_rb / num_se;
3524         unsigned se_mask[4];
3525         unsigned se;
3526
3527         se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask;
3528         se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask;
3529         se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask;
3530         se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask;
3531
3532         WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4));
3533         WARN_ON(!(sh_per_se == 1 || sh_per_se == 2));
3534         WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2));
3535
3536         if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
3537                              (!se_mask[2] && !se_mask[3]))) {
3538                 raster_config_1 &= ~SE_PAIR_MAP_MASK;
3539
3540                 if (!se_mask[0] && !se_mask[1]) {
3541                         raster_config_1 |=
3542                                 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3);
3543                 } else {
3544                         raster_config_1 |=
3545                                 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0);
3546                 }
3547         }
3548
3549         for (se = 0; se < num_se; se++) {
3550                 unsigned raster_config_se = raster_config;
3551                 unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
3552                 unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
3553                 int idx = (se / 2) * 2;
3554
3555                 if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
3556                         raster_config_se &= ~SE_MAP_MASK;
3557
3558                         if (!se_mask[idx]) {
3559                                 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3);
3560                         } else {
3561                                 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0);
3562                         }
3563                 }
3564
3565                 pkr0_mask &= rb_mask;
3566                 pkr1_mask &= rb_mask;
3567                 if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
3568                         raster_config_se &= ~PKR_MAP_MASK;
3569
3570                         if (!pkr0_mask) {
3571                                 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3);
3572                         } else {
3573                                 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0);
3574                         }
3575                 }
3576
3577                 if (rb_per_se >= 2) {
3578                         unsigned rb0_mask = 1 << (se * rb_per_se);
3579                         unsigned rb1_mask = rb0_mask << 1;
3580
3581                         rb0_mask &= rb_mask;
3582                         rb1_mask &= rb_mask;
3583                         if (!rb0_mask || !rb1_mask) {
3584                                 raster_config_se &= ~RB_MAP_PKR0_MASK;
3585
3586                                 if (!rb0_mask) {
3587                                         raster_config_se |=
3588                                                 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3);
3589                                 } else {
3590                                         raster_config_se |=
3591                                                 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0);
3592                                 }
3593                         }
3594
3595                         if (rb_per_se > 2) {
3596                                 rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
3597                                 rb1_mask = rb0_mask << 1;
3598                                 rb0_mask &= rb_mask;
3599                                 rb1_mask &= rb_mask;
3600                                 if (!rb0_mask || !rb1_mask) {
3601                                         raster_config_se &= ~RB_MAP_PKR1_MASK;
3602
3603                                         if (!rb0_mask) {
3604                                                 raster_config_se |=
3605                                                         RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3);
3606                                         } else {
3607                                                 raster_config_se |=
3608                                                         RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0);
3609                                         }
3610                                 }
3611                         }
3612                 }
3613
3614                 /* GRBM_GFX_INDEX has a different offset on VI */
3615                 gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff);
3616                 WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se);
3617                 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3618         }
3619
3620         /* GRBM_GFX_INDEX has a different offset on VI */
3621         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3622 }
3623
3624 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
3625 {
3626         int i, j;
3627         u32 data;
3628         u32 raster_config = 0, raster_config_1 = 0;
3629         u32 active_rbs = 0;
3630         u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
3631                                         adev->gfx.config.max_sh_per_se;
3632         unsigned num_rb_pipes;
3633
3634         mutex_lock(&adev->grbm_idx_mutex);
3635         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3636                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3637                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3638                         data = gfx_v8_0_get_rb_active_bitmap(adev);
3639                         active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
3640                                                rb_bitmap_width_per_sh);
3641                 }
3642         }
3643         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3644
3645         adev->gfx.config.backend_enable_mask = active_rbs;
3646         adev->gfx.config.num_rbs = hweight32(active_rbs);
3647
3648         num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se *
3649                              adev->gfx.config.max_shader_engines, 16);
3650
3651         gfx_v8_0_raster_config(adev, &raster_config, &raster_config_1);
3652
3653         if (!adev->gfx.config.backend_enable_mask ||
3654                         adev->gfx.config.num_rbs >= num_rb_pipes) {
3655                 WREG32(mmPA_SC_RASTER_CONFIG, raster_config);
3656                 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3657         } else {
3658                 gfx_v8_0_write_harvested_raster_configs(adev, raster_config, raster_config_1,
3659                                                         adev->gfx.config.backend_enable_mask,
3660                                                         num_rb_pipes);
3661         }
3662
3663         /* cache the values for userspace */
3664         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3665                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3666                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3667                         adev->gfx.config.rb_config[i][j].rb_backend_disable =
3668                                 RREG32(mmCC_RB_BACKEND_DISABLE);
3669                         adev->gfx.config.rb_config[i][j].user_rb_backend_disable =
3670                                 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3671                         adev->gfx.config.rb_config[i][j].raster_config =
3672                                 RREG32(mmPA_SC_RASTER_CONFIG);
3673                         adev->gfx.config.rb_config[i][j].raster_config_1 =
3674                                 RREG32(mmPA_SC_RASTER_CONFIG_1);
3675                 }
3676         }
3677         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3678         mutex_unlock(&adev->grbm_idx_mutex);
3679 }
3680
3681 /**
3682  * gfx_v8_0_init_compute_vmid - gart enable
3683  *
3684  * @adev: amdgpu_device pointer
3685  *
3686  * Initialize compute vmid sh_mem registers
3687  *
3688  */
3689 #define DEFAULT_SH_MEM_BASES    (0x6000)
3690 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
3691 {
3692         int i;
3693         uint32_t sh_mem_config;
3694         uint32_t sh_mem_bases;
3695
3696         /*
3697          * Configure apertures:
3698          * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
3699          * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
3700          * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
3701          */
3702         sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
3703
3704         sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
3705                         SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
3706                         SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
3707                         SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
3708                         MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
3709                         SH_MEM_CONFIG__PRIVATE_ATC_MASK;
3710
3711         mutex_lock(&adev->srbm_mutex);
3712         for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
3713                 vi_srbm_select(adev, 0, 0, 0, i);
3714                 /* CP and shaders */
3715                 WREG32(mmSH_MEM_CONFIG, sh_mem_config);
3716                 WREG32(mmSH_MEM_APE1_BASE, 1);
3717                 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3718                 WREG32(mmSH_MEM_BASES, sh_mem_bases);
3719         }
3720         vi_srbm_select(adev, 0, 0, 0, 0);
3721         mutex_unlock(&adev->srbm_mutex);
3722
3723         /* Initialize all compute VMIDs to have no GDS, GWS, or OA
3724            acccess. These should be enabled by FW for target VMIDs. */
3725         for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
3726                 WREG32(amdgpu_gds_reg_offset[i].mem_base, 0);
3727                 WREG32(amdgpu_gds_reg_offset[i].mem_size, 0);
3728                 WREG32(amdgpu_gds_reg_offset[i].gws, 0);
3729                 WREG32(amdgpu_gds_reg_offset[i].oa, 0);
3730         }
3731 }
3732
3733 static void gfx_v8_0_init_gds_vmid(struct amdgpu_device *adev)
3734 {
3735         int vmid;
3736
3737         /*
3738          * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
3739          * access. Compute VMIDs should be enabled by FW for target VMIDs,
3740          * the driver can enable them for graphics. VMID0 should maintain
3741          * access so that HWS firmware can save/restore entries.
3742          */
3743         for (vmid = 1; vmid < 16; vmid++) {
3744                 WREG32(amdgpu_gds_reg_offset[vmid].mem_base, 0);
3745                 WREG32(amdgpu_gds_reg_offset[vmid].mem_size, 0);
3746                 WREG32(amdgpu_gds_reg_offset[vmid].gws, 0);
3747                 WREG32(amdgpu_gds_reg_offset[vmid].oa, 0);
3748         }
3749 }
3750
3751 static void gfx_v8_0_config_init(struct amdgpu_device *adev)
3752 {
3753         switch (adev->asic_type) {
3754         default:
3755                 adev->gfx.config.double_offchip_lds_buf = 1;
3756                 break;
3757         case CHIP_CARRIZO:
3758         case CHIP_STONEY:
3759                 adev->gfx.config.double_offchip_lds_buf = 0;
3760                 break;
3761         }
3762 }
3763
3764 static void gfx_v8_0_constants_init(struct amdgpu_device *adev)
3765 {
3766         u32 tmp, sh_static_mem_cfg;
3767         int i;
3768
3769         WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF);
3770         WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3771         WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3772         WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
3773
3774         gfx_v8_0_tiling_mode_table_init(adev);
3775         gfx_v8_0_setup_rb(adev);
3776         gfx_v8_0_get_cu_info(adev);
3777         gfx_v8_0_config_init(adev);
3778
3779         /* XXX SH_MEM regs */
3780         /* where to put LDS, scratch, GPUVM in FSA64 space */
3781         sh_static_mem_cfg = REG_SET_FIELD(0, SH_STATIC_MEM_CONFIG,
3782                                    SWIZZLE_ENABLE, 1);
3783         sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3784                                    ELEMENT_SIZE, 1);
3785         sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3786                                    INDEX_STRIDE, 3);
3787         WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg);
3788
3789         mutex_lock(&adev->srbm_mutex);
3790         for (i = 0; i < adev->vm_manager.id_mgr[0].num_ids; i++) {
3791                 vi_srbm_select(adev, 0, 0, 0, i);
3792                 /* CP and shaders */
3793                 if (i == 0) {
3794                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
3795                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3796                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3797                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3798                         WREG32(mmSH_MEM_CONFIG, tmp);
3799                         WREG32(mmSH_MEM_BASES, 0);
3800                 } else {
3801                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
3802                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3803                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3804                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3805                         WREG32(mmSH_MEM_CONFIG, tmp);
3806                         tmp = adev->gmc.shared_aperture_start >> 48;
3807                         WREG32(mmSH_MEM_BASES, tmp);
3808                 }
3809
3810                 WREG32(mmSH_MEM_APE1_BASE, 1);
3811                 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3812         }
3813         vi_srbm_select(adev, 0, 0, 0, 0);
3814         mutex_unlock(&adev->srbm_mutex);
3815
3816         gfx_v8_0_init_compute_vmid(adev);
3817         gfx_v8_0_init_gds_vmid(adev);
3818
3819         mutex_lock(&adev->grbm_idx_mutex);
3820         /*
3821          * making sure that the following register writes will be broadcasted
3822          * to all the shaders
3823          */
3824         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3825
3826         WREG32(mmPA_SC_FIFO_SIZE,
3827                    (adev->gfx.config.sc_prim_fifo_size_frontend <<
3828                         PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
3829                    (adev->gfx.config.sc_prim_fifo_size_backend <<
3830                         PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
3831                    (adev->gfx.config.sc_hiz_tile_fifo_size <<
3832                         PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
3833                    (adev->gfx.config.sc_earlyz_tile_fifo_size <<
3834                         PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
3835
3836         tmp = RREG32(mmSPI_ARB_PRIORITY);
3837         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS0, 2);
3838         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS1, 2);
3839         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS2, 2);
3840         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS3, 2);
3841         WREG32(mmSPI_ARB_PRIORITY, tmp);
3842
3843         mutex_unlock(&adev->grbm_idx_mutex);
3844
3845 }
3846
3847 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3848 {
3849         u32 i, j, k;
3850         u32 mask;
3851
3852         mutex_lock(&adev->grbm_idx_mutex);
3853         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3854                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3855                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3856                         for (k = 0; k < adev->usec_timeout; k++) {
3857                                 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3858                                         break;
3859                                 udelay(1);
3860                         }
3861                         if (k == adev->usec_timeout) {
3862                                 gfx_v8_0_select_se_sh(adev, 0xffffffff,
3863                                                       0xffffffff, 0xffffffff);
3864                                 mutex_unlock(&adev->grbm_idx_mutex);
3865                                 DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
3866                                          i, j);
3867                                 return;
3868                         }
3869                 }
3870         }
3871         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3872         mutex_unlock(&adev->grbm_idx_mutex);
3873
3874         mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3875                 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3876                 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3877                 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3878         for (k = 0; k < adev->usec_timeout; k++) {
3879                 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3880                         break;
3881                 udelay(1);
3882         }
3883 }
3884
3885 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3886                                                bool enable)
3887 {
3888         u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3889
3890         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
3891         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
3892         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
3893         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
3894
3895         WREG32(mmCP_INT_CNTL_RING0, tmp);
3896 }
3897
3898 static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
3899 {
3900         adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
3901         /* csib */
3902         WREG32(mmRLC_CSIB_ADDR_HI,
3903                         adev->gfx.rlc.clear_state_gpu_addr >> 32);
3904         WREG32(mmRLC_CSIB_ADDR_LO,
3905                         adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
3906         WREG32(mmRLC_CSIB_LENGTH,
3907                         adev->gfx.rlc.clear_state_size);
3908 }
3909
3910 static void gfx_v8_0_parse_ind_reg_list(int *register_list_format,
3911                                 int ind_offset,
3912                                 int list_size,
3913                                 int *unique_indices,
3914                                 int *indices_count,
3915                                 int max_indices,
3916                                 int *ind_start_offsets,
3917                                 int *offset_count,
3918                                 int max_offset)
3919 {
3920         int indices;
3921         bool new_entry = true;
3922
3923         for (; ind_offset < list_size; ind_offset++) {
3924
3925                 if (new_entry) {
3926                         new_entry = false;
3927                         ind_start_offsets[*offset_count] = ind_offset;
3928                         *offset_count = *offset_count + 1;
3929                         BUG_ON(*offset_count >= max_offset);
3930                 }
3931
3932                 if (register_list_format[ind_offset] == 0xFFFFFFFF) {
3933                         new_entry = true;
3934                         continue;
3935                 }
3936
3937                 ind_offset += 2;
3938
3939                 /* look for the matching indice */
3940                 for (indices = 0;
3941                         indices < *indices_count;
3942                         indices++) {
3943                         if (unique_indices[indices] ==
3944                                 register_list_format[ind_offset])
3945                                 break;
3946                 }
3947
3948                 if (indices >= *indices_count) {
3949                         unique_indices[*indices_count] =
3950                                 register_list_format[ind_offset];
3951                         indices = *indices_count;
3952                         *indices_count = *indices_count + 1;
3953                         BUG_ON(*indices_count >= max_indices);
3954                 }
3955
3956                 register_list_format[ind_offset] = indices;
3957         }
3958 }
3959
3960 static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
3961 {
3962         int i, temp, data;
3963         int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0};
3964         int indices_count = 0;
3965         int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3966         int offset_count = 0;
3967
3968         int list_size;
3969         unsigned int *register_list_format =
3970                 kmemdup(adev->gfx.rlc.register_list_format,
3971                         adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
3972         if (!register_list_format)
3973                 return -ENOMEM;
3974
3975         gfx_v8_0_parse_ind_reg_list(register_list_format,
3976                                 RLC_FormatDirectRegListLength,
3977                                 adev->gfx.rlc.reg_list_format_size_bytes >> 2,
3978                                 unique_indices,
3979                                 &indices_count,
3980                                 ARRAY_SIZE(unique_indices),
3981                                 indirect_start_offsets,
3982                                 &offset_count,
3983                                 ARRAY_SIZE(indirect_start_offsets));
3984
3985         /* save and restore list */
3986         WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1);
3987
3988         WREG32(mmRLC_SRM_ARAM_ADDR, 0);
3989         for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
3990                 WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]);
3991
3992         /* indirect list */
3993         WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start);
3994         for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
3995                 WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]);
3996
3997         list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
3998         list_size = list_size >> 1;
3999         WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size);
4000         WREG32(mmRLC_GPM_SCRATCH_DATA, list_size);
4001
4002         /* starting offsets starts */
4003         WREG32(mmRLC_GPM_SCRATCH_ADDR,
4004                 adev->gfx.rlc.starting_offsets_start);
4005         for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
4006                 WREG32(mmRLC_GPM_SCRATCH_DATA,
4007                                 indirect_start_offsets[i]);
4008
4009         /* unique indices */
4010         temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
4011         data = mmRLC_SRM_INDEX_CNTL_DATA_0;
4012         for (i = 0; i < ARRAY_SIZE(unique_indices); i++) {
4013                 if (unique_indices[i] != 0) {
4014                         WREG32(temp + i, unique_indices[i] & 0x3FFFF);
4015                         WREG32(data + i, unique_indices[i] >> 20);
4016                 }
4017         }
4018         kfree(register_list_format);
4019
4020         return 0;
4021 }
4022
4023 static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
4024 {
4025         WREG32_FIELD(RLC_SRM_CNTL, SRM_ENABLE, 1);
4026 }
4027
4028 static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev)
4029 {
4030         uint32_t data;
4031
4032         WREG32_FIELD(CP_RB_WPTR_POLL_CNTL, IDLE_POLL_COUNT, 0x60);
4033
4034         data = REG_SET_FIELD(0, RLC_PG_DELAY, POWER_UP_DELAY, 0x10);
4035         data = REG_SET_FIELD(data, RLC_PG_DELAY, POWER_DOWN_DELAY, 0x10);
4036         data = REG_SET_FIELD(data, RLC_PG_DELAY, CMD_PROPAGATE_DELAY, 0x10);
4037         data = REG_SET_FIELD(data, RLC_PG_DELAY, MEM_SLEEP_DELAY, 0x10);
4038         WREG32(mmRLC_PG_DELAY, data);
4039
4040         WREG32_FIELD(RLC_PG_DELAY_2, SERDES_CMD_DELAY, 0x3);
4041         WREG32_FIELD(RLC_AUTO_PG_CTRL, GRBM_REG_SAVE_GFX_IDLE_THRESHOLD, 0x55f0);
4042
4043 }
4044
4045 static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
4046                                                 bool enable)
4047 {
4048         WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PU_ENABLE, enable ? 1 : 0);
4049 }
4050
4051 static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
4052                                                   bool enable)
4053 {
4054         WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PD_ENABLE, enable ? 1 : 0);
4055 }
4056
4057 static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable)
4058 {
4059         WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 0 : 1);
4060 }
4061
4062 static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
4063 {
4064         if ((adev->asic_type == CHIP_CARRIZO) ||
4065             (adev->asic_type == CHIP_STONEY)) {
4066                 gfx_v8_0_init_csb(adev);
4067                 gfx_v8_0_init_save_restore_list(adev);
4068                 gfx_v8_0_enable_save_restore_machine(adev);
4069                 WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
4070                 gfx_v8_0_init_power_gating(adev);
4071                 WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
4072         } else if ((adev->asic_type == CHIP_POLARIS11) ||
4073                    (adev->asic_type == CHIP_POLARIS12) ||
4074                    (adev->asic_type == CHIP_VEGAM)) {
4075                 gfx_v8_0_init_csb(adev);
4076                 gfx_v8_0_init_save_restore_list(adev);
4077                 gfx_v8_0_enable_save_restore_machine(adev);
4078                 gfx_v8_0_init_power_gating(adev);
4079         }
4080
4081 }
4082
4083 static void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
4084 {
4085         WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 0);
4086
4087         gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4088         gfx_v8_0_wait_for_rlc_serdes(adev);
4089 }
4090
4091 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
4092 {
4093         WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4094         udelay(50);
4095
4096         WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
4097         udelay(50);
4098 }
4099
4100 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
4101 {
4102         WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 1);
4103
4104         /* carrizo do enable cp interrupt after cp inited */
4105         if (!(adev->flags & AMD_IS_APU))
4106                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4107
4108         udelay(50);
4109 }
4110
4111 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
4112 {
4113         if (amdgpu_sriov_vf(adev)) {
4114                 gfx_v8_0_init_csb(adev);
4115                 return 0;
4116         }
4117
4118         adev->gfx.rlc.funcs->stop(adev);
4119         adev->gfx.rlc.funcs->reset(adev);
4120         gfx_v8_0_init_pg(adev);
4121         adev->gfx.rlc.funcs->start(adev);
4122
4123         return 0;
4124 }
4125
4126 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
4127 {
4128         u32 tmp = RREG32(mmCP_ME_CNTL);
4129
4130         if (enable) {
4131                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
4132                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
4133                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
4134         } else {
4135                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
4136                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
4137                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
4138         }
4139         WREG32(mmCP_ME_CNTL, tmp);
4140         udelay(50);
4141 }
4142
4143 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
4144 {
4145         u32 count = 0;
4146         const struct cs_section_def *sect = NULL;
4147         const struct cs_extent_def *ext = NULL;
4148
4149         /* begin clear state */
4150         count += 2;
4151         /* context control state */
4152         count += 3;
4153
4154         for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4155                 for (ext = sect->section; ext->extent != NULL; ++ext) {
4156                         if (sect->id == SECT_CONTEXT)
4157                                 count += 2 + ext->reg_count;
4158                         else
4159                                 return 0;
4160                 }
4161         }
4162         /* pa_sc_raster_config/pa_sc_raster_config1 */
4163         count += 4;
4164         /* end clear state */
4165         count += 2;
4166         /* clear state */
4167         count += 2;
4168
4169         return count;
4170 }
4171
4172 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
4173 {
4174         struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
4175         const struct cs_section_def *sect = NULL;
4176         const struct cs_extent_def *ext = NULL;
4177         int r, i;
4178
4179         /* init the CP */
4180         WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
4181         WREG32(mmCP_ENDIAN_SWAP, 0);
4182         WREG32(mmCP_DEVICE_ID, 1);
4183
4184         gfx_v8_0_cp_gfx_enable(adev, true);
4185
4186         r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
4187         if (r) {
4188                 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
4189                 return r;
4190         }
4191
4192         /* clear state buffer */
4193         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4194         amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4195
4196         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4197         amdgpu_ring_write(ring, 0x80000000);
4198         amdgpu_ring_write(ring, 0x80000000);
4199
4200         for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4201                 for (ext = sect->section; ext->extent != NULL; ++ext) {
4202                         if (sect->id == SECT_CONTEXT) {
4203                                 amdgpu_ring_write(ring,
4204                                        PACKET3(PACKET3_SET_CONTEXT_REG,
4205                                                ext->reg_count));
4206                                 amdgpu_ring_write(ring,
4207                                        ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
4208                                 for (i = 0; i < ext->reg_count; i++)
4209                                         amdgpu_ring_write(ring, ext->extent[i]);
4210                         }
4211                 }
4212         }
4213
4214         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4215         amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
4216         amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config);
4217         amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config_1);
4218
4219         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4220         amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4221
4222         amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4223         amdgpu_ring_write(ring, 0);
4224
4225         /* init the CE partitions */
4226         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4227         amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4228         amdgpu_ring_write(ring, 0x8000);
4229         amdgpu_ring_write(ring, 0x8000);
4230
4231         amdgpu_ring_commit(ring);
4232
4233         return 0;
4234 }
4235 static void gfx_v8_0_set_cpg_door_bell(struct amdgpu_device *adev, struct amdgpu_ring *ring)
4236 {
4237         u32 tmp;
4238         /* no gfx doorbells on iceland */
4239         if (adev->asic_type == CHIP_TOPAZ)
4240                 return;
4241
4242         tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
4243
4244         if (ring->use_doorbell) {
4245                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4246                                 DOORBELL_OFFSET, ring->doorbell_index);
4247                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4248                                                 DOORBELL_HIT, 0);
4249                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4250                                             DOORBELL_EN, 1);
4251         } else {
4252                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
4253         }
4254
4255         WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
4256
4257         if (adev->flags & AMD_IS_APU)
4258                 return;
4259
4260         tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
4261                                         DOORBELL_RANGE_LOWER,
4262                                         adev->doorbell_index.gfx_ring0);
4263         WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
4264
4265         WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
4266                 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
4267 }
4268
4269 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
4270 {
4271         struct amdgpu_ring *ring;
4272         u32 tmp;
4273         u32 rb_bufsz;
4274         u64 rb_addr, rptr_addr, wptr_gpu_addr;
4275
4276         /* Set the write pointer delay */
4277         WREG32(mmCP_RB_WPTR_DELAY, 0);
4278
4279         /* set the RB to use vmid 0 */
4280         WREG32(mmCP_RB_VMID, 0);
4281
4282         /* Set ring buffer size */
4283         ring = &adev->gfx.gfx_ring[0];
4284         rb_bufsz = order_base_2(ring->ring_size / 8);
4285         tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
4286         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
4287         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
4288         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
4289 #ifdef __BIG_ENDIAN
4290         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
4291 #endif
4292         WREG32(mmCP_RB0_CNTL, tmp);
4293
4294         /* Initialize the ring buffer's read and write pointers */
4295         WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
4296         ring->wptr = 0;
4297         WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
4298
4299         /* set the wb address wether it's enabled or not */
4300         rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4301         WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
4302         WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
4303
4304         wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4305         WREG32(mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
4306         WREG32(mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
4307         mdelay(1);
4308         WREG32(mmCP_RB0_CNTL, tmp);
4309
4310         rb_addr = ring->gpu_addr >> 8;
4311         WREG32(mmCP_RB0_BASE, rb_addr);
4312         WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
4313
4314         gfx_v8_0_set_cpg_door_bell(adev, ring);
4315         /* start the ring */
4316         amdgpu_ring_clear_ring(ring);
4317         gfx_v8_0_cp_gfx_start(adev);
4318         ring->sched.ready = true;
4319
4320         return 0;
4321 }
4322
4323 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
4324 {
4325         if (enable) {
4326                 WREG32(mmCP_MEC_CNTL, 0);
4327         } else {
4328                 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
4329                 adev->gfx.kiq.ring.sched.ready = false;
4330         }
4331         udelay(50);
4332 }
4333
4334 /* KIQ functions */
4335 static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring)
4336 {
4337         uint32_t tmp;
4338         struct amdgpu_device *adev = ring->adev;
4339
4340         /* tell RLC which is KIQ queue */
4341         tmp = RREG32(mmRLC_CP_SCHEDULERS);
4342         tmp &= 0xffffff00;
4343         tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
4344         WREG32(mmRLC_CP_SCHEDULERS, tmp);
4345         tmp |= 0x80;
4346         WREG32(mmRLC_CP_SCHEDULERS, tmp);
4347 }
4348
4349 static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev)
4350 {
4351         struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
4352         uint64_t queue_mask = 0;
4353         int r, i;
4354
4355         for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
4356                 if (!test_bit(i, adev->gfx.mec.queue_bitmap))
4357                         continue;
4358
4359                 /* This situation may be hit in the future if a new HW
4360                  * generation exposes more than 64 queues. If so, the
4361                  * definition of queue_mask needs updating */
4362                 if (WARN_ON(i >= (sizeof(queue_mask)*8))) {
4363                         DRM_ERROR("Invalid KCQ enabled: %d\n", i);
4364                         break;
4365                 }
4366
4367                 queue_mask |= (1ull << i);
4368         }
4369
4370         r = amdgpu_ring_alloc(kiq_ring, (8 * adev->gfx.num_compute_rings) + 8);
4371         if (r) {
4372                 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
4373                 return r;
4374         }
4375         /* set resources */
4376         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
4377         amdgpu_ring_write(kiq_ring, 0); /* vmid_mask:0 queue_type:0 (KIQ) */
4378         amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */
4379         amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */
4380         amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
4381         amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
4382         amdgpu_ring_write(kiq_ring, 0); /* oac mask */
4383         amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
4384         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4385                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4386                 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
4387                 uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4388
4389                 /* map queues */
4390                 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
4391                 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
4392                 amdgpu_ring_write(kiq_ring,
4393                                   PACKET3_MAP_QUEUES_NUM_QUEUES(1));
4394                 amdgpu_ring_write(kiq_ring,
4395                                   PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index) |
4396                                   PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
4397                                   PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
4398                                   PACKET3_MAP_QUEUES_ME(ring->me == 1 ? 0 : 1)); /* doorbell */
4399                 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
4400                 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
4401                 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
4402                 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
4403         }
4404
4405         amdgpu_ring_commit(kiq_ring);
4406
4407         return 0;
4408 }
4409
4410 static int gfx_v8_0_deactivate_hqd(struct amdgpu_device *adev, u32 req)
4411 {
4412         int i, r = 0;
4413
4414         if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) {
4415                 WREG32_FIELD(CP_HQD_DEQUEUE_REQUEST, DEQUEUE_REQ, req);
4416                 for (i = 0; i < adev->usec_timeout; i++) {
4417                         if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK))
4418                                 break;
4419                         udelay(1);
4420                 }
4421                 if (i == adev->usec_timeout)
4422                         r = -ETIMEDOUT;
4423         }
4424         WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
4425         WREG32(mmCP_HQD_PQ_RPTR, 0);
4426         WREG32(mmCP_HQD_PQ_WPTR, 0);
4427
4428         return r;
4429 }
4430
4431 static void gfx_v8_0_mqd_set_priority(struct amdgpu_ring *ring, struct vi_mqd *mqd)
4432 {
4433         struct amdgpu_device *adev = ring->adev;
4434
4435         if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
4436                 if (amdgpu_gfx_is_high_priority_compute_queue(adev, ring->queue)) {
4437                         mqd->cp_hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH;
4438                         mqd->cp_hqd_queue_priority =
4439                                 AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM;
4440                 }
4441         }
4442 }
4443
4444 static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
4445 {
4446         struct amdgpu_device *adev = ring->adev;
4447         struct vi_mqd *mqd = ring->mqd_ptr;
4448         uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
4449         uint32_t tmp;
4450
4451         mqd->header = 0xC0310800;
4452         mqd->compute_pipelinestat_enable = 0x00000001;
4453         mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4454         mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4455         mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4456         mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4457         mqd->compute_misc_reserved = 0x00000003;
4458         mqd->dynamic_cu_mask_addr_lo = lower_32_bits(ring->mqd_gpu_addr
4459                                                      + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4460         mqd->dynamic_cu_mask_addr_hi = upper_32_bits(ring->mqd_gpu_addr
4461                                                      + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4462         eop_base_addr = ring->eop_gpu_addr >> 8;
4463         mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
4464         mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
4465
4466         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4467         tmp = RREG32(mmCP_HQD_EOP_CONTROL);
4468         tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
4469                         (order_base_2(GFX8_MEC_HPD_SIZE / 4) - 1));
4470
4471         mqd->cp_hqd_eop_control = tmp;
4472
4473         /* enable doorbell? */
4474         tmp = REG_SET_FIELD(RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL),
4475                             CP_HQD_PQ_DOORBELL_CONTROL,
4476                             DOORBELL_EN,
4477                             ring->use_doorbell ? 1 : 0);
4478
4479         mqd->cp_hqd_pq_doorbell_control = tmp;
4480
4481         /* set the pointer to the MQD */
4482         mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
4483         mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
4484
4485         /* set MQD vmid to 0 */
4486         tmp = RREG32(mmCP_MQD_CONTROL);
4487         tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4488         mqd->cp_mqd_control = tmp;
4489
4490         /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4491         hqd_gpu_addr = ring->gpu_addr >> 8;
4492         mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4493         mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4494
4495         /* set up the HQD, this is similar to CP_RB0_CNTL */
4496         tmp = RREG32(mmCP_HQD_PQ_CONTROL);
4497         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4498                             (order_base_2(ring->ring_size / 4) - 1));
4499         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4500                         ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
4501 #ifdef __BIG_ENDIAN
4502         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
4503 #endif
4504         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
4505         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
4506         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4507         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4508         mqd->cp_hqd_pq_control = tmp;
4509
4510         /* set the wb address whether it's enabled or not */
4511         wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4512         mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4513         mqd->cp_hqd_pq_rptr_report_addr_hi =
4514                 upper_32_bits(wb_gpu_addr) & 0xffff;
4515
4516         /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4517         wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4518         mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
4519         mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4520
4521         tmp = 0;
4522         /* enable the doorbell if requested */
4523         if (ring->use_doorbell) {
4524                 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4525                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4526                                 DOORBELL_OFFSET, ring->doorbell_index);
4527
4528                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4529                                          DOORBELL_EN, 1);
4530                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4531                                          DOORBELL_SOURCE, 0);
4532                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4533                                          DOORBELL_HIT, 0);
4534         }
4535
4536         mqd->cp_hqd_pq_doorbell_control = tmp;
4537
4538         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4539         ring->wptr = 0;
4540         mqd->cp_hqd_pq_wptr = ring->wptr;
4541         mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
4542
4543         /* set the vmid for the queue */
4544         mqd->cp_hqd_vmid = 0;
4545
4546         tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
4547         tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
4548         mqd->cp_hqd_persistent_state = tmp;
4549
4550         /* set MTYPE */
4551         tmp = RREG32(mmCP_HQD_IB_CONTROL);
4552         tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
4553         tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MTYPE, 3);
4554         mqd->cp_hqd_ib_control = tmp;
4555
4556         tmp = RREG32(mmCP_HQD_IQ_TIMER);
4557         tmp = REG_SET_FIELD(tmp, CP_HQD_IQ_TIMER, MTYPE, 3);
4558         mqd->cp_hqd_iq_timer = tmp;
4559
4560         tmp = RREG32(mmCP_HQD_CTX_SAVE_CONTROL);
4561         tmp = REG_SET_FIELD(tmp, CP_HQD_CTX_SAVE_CONTROL, MTYPE, 3);
4562         mqd->cp_hqd_ctx_save_control = tmp;
4563
4564         /* defaults */
4565         mqd->cp_hqd_eop_rptr = RREG32(mmCP_HQD_EOP_RPTR);
4566         mqd->cp_hqd_eop_wptr = RREG32(mmCP_HQD_EOP_WPTR);
4567         mqd->cp_hqd_ctx_save_base_addr_lo = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_LO);
4568         mqd->cp_hqd_ctx_save_base_addr_hi = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_HI);
4569         mqd->cp_hqd_cntl_stack_offset = RREG32(mmCP_HQD_CNTL_STACK_OFFSET);
4570         mqd->cp_hqd_cntl_stack_size = RREG32(mmCP_HQD_CNTL_STACK_SIZE);
4571         mqd->cp_hqd_wg_state_offset = RREG32(mmCP_HQD_WG_STATE_OFFSET);
4572         mqd->cp_hqd_ctx_save_size = RREG32(mmCP_HQD_CTX_SAVE_SIZE);
4573         mqd->cp_hqd_eop_done_events = RREG32(mmCP_HQD_EOP_EVENTS);
4574         mqd->cp_hqd_error = RREG32(mmCP_HQD_ERROR);
4575         mqd->cp_hqd_eop_wptr_mem = RREG32(mmCP_HQD_EOP_WPTR_MEM);
4576         mqd->cp_hqd_eop_dones = RREG32(mmCP_HQD_EOP_DONES);
4577
4578         /* set static priority for a queue/ring */
4579         gfx_v8_0_mqd_set_priority(ring, mqd);
4580         mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM);
4581
4582         /* map_queues packet doesn't need activate the queue,
4583          * so only kiq need set this field.
4584          */
4585         if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
4586                 mqd->cp_hqd_active = 1;
4587
4588         return 0;
4589 }
4590
4591 static int gfx_v8_0_mqd_commit(struct amdgpu_device *adev,
4592                         struct vi_mqd *mqd)
4593 {
4594         uint32_t mqd_reg;
4595         uint32_t *mqd_data;
4596
4597         /* HQD registers extend from mmCP_MQD_BASE_ADDR to mmCP_HQD_ERROR */
4598         mqd_data = &mqd->cp_mqd_base_addr_lo;
4599
4600         /* disable wptr polling */
4601         WREG32_FIELD(CP_PQ_WPTR_POLL_CNTL, EN, 0);
4602
4603         /* program all HQD registers */
4604         for (mqd_reg = mmCP_HQD_VMID; mqd_reg <= mmCP_HQD_EOP_CONTROL; mqd_reg++)
4605                 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4606
4607         /* Tonga errata: EOP RPTR/WPTR should be left unmodified.
4608          * This is safe since EOP RPTR==WPTR for any inactive HQD
4609          * on ASICs that do not support context-save.
4610          * EOP writes/reads can start anywhere in the ring.
4611          */
4612         if (adev->asic_type != CHIP_TONGA) {
4613                 WREG32(mmCP_HQD_EOP_RPTR, mqd->cp_hqd_eop_rptr);
4614                 WREG32(mmCP_HQD_EOP_WPTR, mqd->cp_hqd_eop_wptr);
4615                 WREG32(mmCP_HQD_EOP_WPTR_MEM, mqd->cp_hqd_eop_wptr_mem);
4616         }
4617
4618         for (mqd_reg = mmCP_HQD_EOP_EVENTS; mqd_reg <= mmCP_HQD_ERROR; mqd_reg++)
4619                 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4620
4621         /* activate the HQD */
4622         for (mqd_reg = mmCP_MQD_BASE_ADDR; mqd_reg <= mmCP_HQD_ACTIVE; mqd_reg++)
4623                 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4624
4625         return 0;
4626 }
4627
4628 static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring)
4629 {
4630         struct amdgpu_device *adev = ring->adev;
4631         struct vi_mqd *mqd = ring->mqd_ptr;
4632         int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
4633
4634         gfx_v8_0_kiq_setting(ring);
4635
4636         if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
4637                 /* reset MQD to a clean status */
4638                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4639                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4640
4641                 /* reset ring buffer */
4642                 ring->wptr = 0;
4643                 amdgpu_ring_clear_ring(ring);
4644                 mutex_lock(&adev->srbm_mutex);
4645                 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4646                 gfx_v8_0_mqd_commit(adev, mqd);
4647                 vi_srbm_select(adev, 0, 0, 0, 0);
4648                 mutex_unlock(&adev->srbm_mutex);
4649         } else {
4650                 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4651                 ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4652                 ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
4653                 mutex_lock(&adev->srbm_mutex);
4654                 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4655                 gfx_v8_0_mqd_init(ring);
4656                 gfx_v8_0_mqd_commit(adev, mqd);
4657                 vi_srbm_select(adev, 0, 0, 0, 0);
4658                 mutex_unlock(&adev->srbm_mutex);
4659
4660                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4661                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4662         }
4663
4664         return 0;
4665 }
4666
4667 static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring)
4668 {
4669         struct amdgpu_device *adev = ring->adev;
4670         struct vi_mqd *mqd = ring->mqd_ptr;
4671         int mqd_idx = ring - &adev->gfx.compute_ring[0];
4672
4673         if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
4674                 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4675                 ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4676                 ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
4677                 mutex_lock(&adev->srbm_mutex);
4678                 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4679                 gfx_v8_0_mqd_init(ring);
4680                 vi_srbm_select(adev, 0, 0, 0, 0);
4681                 mutex_unlock(&adev->srbm_mutex);
4682
4683                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4684                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4685         } else if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
4686                 /* reset MQD to a clean status */
4687                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4688                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4689                 /* reset ring buffer */
4690                 ring->wptr = 0;
4691                 amdgpu_ring_clear_ring(ring);
4692         } else {
4693                 amdgpu_ring_clear_ring(ring);
4694         }
4695         return 0;
4696 }
4697
4698 static void gfx_v8_0_set_mec_doorbell_range(struct amdgpu_device *adev)
4699 {
4700         if (adev->asic_type > CHIP_TONGA) {
4701                 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, adev->doorbell_index.kiq << 2);
4702                 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, adev->doorbell_index.mec_ring7 << 2);
4703         }
4704         /* enable doorbells */
4705         WREG32_FIELD(CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4706 }
4707
4708 static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev)
4709 {
4710         struct amdgpu_ring *ring;
4711         int r;
4712
4713         ring = &adev->gfx.kiq.ring;
4714
4715         r = amdgpu_bo_reserve(ring->mqd_obj, false);
4716         if (unlikely(r != 0))
4717                 return r;
4718
4719         r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4720         if (unlikely(r != 0))
4721                 return r;
4722
4723         gfx_v8_0_kiq_init_queue(ring);
4724         amdgpu_bo_kunmap(ring->mqd_obj);
4725         ring->mqd_ptr = NULL;
4726         amdgpu_bo_unreserve(ring->mqd_obj);
4727         ring->sched.ready = true;
4728         return 0;
4729 }
4730
4731 static int gfx_v8_0_kcq_resume(struct amdgpu_device *adev)
4732 {
4733         struct amdgpu_ring *ring = NULL;
4734         int r = 0, i;
4735
4736         gfx_v8_0_cp_compute_enable(adev, true);
4737
4738         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4739                 ring = &adev->gfx.compute_ring[i];
4740
4741                 r = amdgpu_bo_reserve(ring->mqd_obj, false);
4742                 if (unlikely(r != 0))
4743                         goto done;
4744                 r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4745                 if (!r) {
4746                         r = gfx_v8_0_kcq_init_queue(ring);
4747                         amdgpu_bo_kunmap(ring->mqd_obj);
4748                         ring->mqd_ptr = NULL;
4749                 }
4750                 amdgpu_bo_unreserve(ring->mqd_obj);
4751                 if (r)
4752                         goto done;
4753         }
4754
4755         gfx_v8_0_set_mec_doorbell_range(adev);
4756
4757         r = gfx_v8_0_kiq_kcq_enable(adev);
4758         if (r)
4759                 goto done;
4760
4761 done:
4762         return r;
4763 }
4764
4765 static int gfx_v8_0_cp_test_all_rings(struct amdgpu_device *adev)
4766 {
4767         int r, i;
4768         struct amdgpu_ring *ring;
4769
4770         /* collect all the ring_tests here, gfx, kiq, compute */
4771         ring = &adev->gfx.gfx_ring[0];
4772         r = amdgpu_ring_test_helper(ring);
4773         if (r)
4774                 return r;
4775
4776         ring = &adev->gfx.kiq.ring;
4777         r = amdgpu_ring_test_helper(ring);
4778         if (r)
4779                 return r;
4780
4781         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4782                 ring = &adev->gfx.compute_ring[i];
4783                 amdgpu_ring_test_helper(ring);
4784         }
4785
4786         return 0;
4787 }
4788
4789 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
4790 {
4791         int r;
4792
4793         if (!(adev->flags & AMD_IS_APU))
4794                 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4795
4796         r = gfx_v8_0_kiq_resume(adev);
4797         if (r)
4798                 return r;
4799
4800         r = gfx_v8_0_cp_gfx_resume(adev);
4801         if (r)
4802                 return r;
4803
4804         r = gfx_v8_0_kcq_resume(adev);
4805         if (r)
4806                 return r;
4807
4808         r = gfx_v8_0_cp_test_all_rings(adev);
4809         if (r)
4810                 return r;
4811
4812         gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4813
4814         return 0;
4815 }
4816
4817 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
4818 {
4819         gfx_v8_0_cp_gfx_enable(adev, enable);
4820         gfx_v8_0_cp_compute_enable(adev, enable);
4821 }
4822
4823 static int gfx_v8_0_hw_init(void *handle)
4824 {
4825         int r;
4826         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4827
4828         gfx_v8_0_init_golden_registers(adev);
4829         gfx_v8_0_constants_init(adev);
4830
4831         r = adev->gfx.rlc.funcs->resume(adev);
4832         if (r)
4833                 return r;
4834
4835         r = gfx_v8_0_cp_resume(adev);
4836
4837         return r;
4838 }
4839
4840 static int gfx_v8_0_kcq_disable(struct amdgpu_device *adev)
4841 {
4842         int r, i;
4843         struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
4844
4845         r = amdgpu_ring_alloc(kiq_ring, 6 * adev->gfx.num_compute_rings);
4846         if (r)
4847                 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
4848
4849         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4850                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4851
4852                 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
4853                 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
4854                                                 PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */
4855                                                 PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
4856                                                 PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) |
4857                                                 PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
4858                 amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
4859                 amdgpu_ring_write(kiq_ring, 0);
4860                 amdgpu_ring_write(kiq_ring, 0);
4861                 amdgpu_ring_write(kiq_ring, 0);
4862         }
4863         r = amdgpu_ring_test_helper(kiq_ring);
4864         if (r)
4865                 DRM_ERROR("KCQ disable failed\n");
4866
4867         return r;
4868 }
4869
4870 static bool gfx_v8_0_is_idle(void *handle)
4871 {
4872         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4873
4874         if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE)
4875                 || RREG32(mmGRBM_STATUS2) != 0x8)
4876                 return false;
4877         else
4878                 return true;
4879 }
4880
4881 static bool gfx_v8_0_rlc_is_idle(void *handle)
4882 {
4883         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4884
4885         if (RREG32(mmGRBM_STATUS2) != 0x8)
4886                 return false;
4887         else
4888                 return true;
4889 }
4890
4891 static int gfx_v8_0_wait_for_rlc_idle(void *handle)
4892 {
4893         unsigned int i;
4894         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4895
4896         for (i = 0; i < adev->usec_timeout; i++) {
4897                 if (gfx_v8_0_rlc_is_idle(handle))
4898                         return 0;
4899
4900                 udelay(1);
4901         }
4902         return -ETIMEDOUT;
4903 }
4904
4905 static int gfx_v8_0_wait_for_idle(void *handle)
4906 {
4907         unsigned int i;
4908         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4909
4910         for (i = 0; i < adev->usec_timeout; i++) {
4911                 if (gfx_v8_0_is_idle(handle))
4912                         return 0;
4913
4914                 udelay(1);
4915         }
4916         return -ETIMEDOUT;
4917 }
4918
4919 static int gfx_v8_0_hw_fini(void *handle)
4920 {
4921         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4922
4923         amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
4924         amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
4925
4926         amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
4927
4928         amdgpu_irq_put(adev, &adev->gfx.sq_irq, 0);
4929
4930         /* disable KCQ to avoid CPC touch memory not valid anymore */
4931         gfx_v8_0_kcq_disable(adev);
4932
4933         if (amdgpu_sriov_vf(adev)) {
4934                 pr_debug("For SRIOV client, shouldn't do anything.\n");
4935                 return 0;
4936         }
4937         amdgpu_gfx_rlc_enter_safe_mode(adev);
4938         if (!gfx_v8_0_wait_for_idle(adev))
4939                 gfx_v8_0_cp_enable(adev, false);
4940         else
4941                 pr_err("cp is busy, skip halt cp\n");
4942         if (!gfx_v8_0_wait_for_rlc_idle(adev))
4943                 adev->gfx.rlc.funcs->stop(adev);
4944         else
4945                 pr_err("rlc is busy, skip halt rlc\n");
4946         amdgpu_gfx_rlc_exit_safe_mode(adev);
4947
4948         return 0;
4949 }
4950
4951 static int gfx_v8_0_suspend(void *handle)
4952 {
4953         return gfx_v8_0_hw_fini(handle);
4954 }
4955
4956 static int gfx_v8_0_resume(void *handle)
4957 {
4958         return gfx_v8_0_hw_init(handle);
4959 }
4960
4961 static bool gfx_v8_0_check_soft_reset(void *handle)
4962 {
4963         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4964         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4965         u32 tmp;
4966
4967         /* GRBM_STATUS */
4968         tmp = RREG32(mmGRBM_STATUS);
4969         if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
4970                    GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
4971                    GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
4972                    GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
4973                    GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
4974                    GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK |
4975                    GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
4976                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4977                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4978                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4979                                                 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
4980                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4981                                                 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
4982         }
4983
4984         /* GRBM_STATUS2 */
4985         tmp = RREG32(mmGRBM_STATUS2);
4986         if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
4987                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4988                                                 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4989
4990         if (REG_GET_FIELD(tmp, GRBM_STATUS2, CPF_BUSY) ||
4991             REG_GET_FIELD(tmp, GRBM_STATUS2, CPC_BUSY) ||
4992             REG_GET_FIELD(tmp, GRBM_STATUS2, CPG_BUSY)) {
4993                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4994                                                 SOFT_RESET_CPF, 1);
4995                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4996                                                 SOFT_RESET_CPC, 1);
4997                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4998                                                 SOFT_RESET_CPG, 1);
4999                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET,
5000                                                 SOFT_RESET_GRBM, 1);
5001         }
5002
5003         /* SRBM_STATUS */
5004         tmp = RREG32(mmSRBM_STATUS);
5005         if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
5006                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5007                                                 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5008         if (REG_GET_FIELD(tmp, SRBM_STATUS, SEM_BUSY))
5009                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5010                                                 SRBM_SOFT_RESET, SOFT_RESET_SEM, 1);
5011
5012         if (grbm_soft_reset || srbm_soft_reset) {
5013                 adev->gfx.grbm_soft_reset = grbm_soft_reset;
5014                 adev->gfx.srbm_soft_reset = srbm_soft_reset;
5015                 return true;
5016         } else {
5017                 adev->gfx.grbm_soft_reset = 0;
5018                 adev->gfx.srbm_soft_reset = 0;
5019                 return false;
5020         }
5021 }
5022
5023 static int gfx_v8_0_pre_soft_reset(void *handle)
5024 {
5025         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5026         u32 grbm_soft_reset = 0;
5027
5028         if ((!adev->gfx.grbm_soft_reset) &&
5029             (!adev->gfx.srbm_soft_reset))
5030                 return 0;
5031
5032         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5033
5034         /* stop the rlc */
5035         adev->gfx.rlc.funcs->stop(adev);
5036
5037         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5038             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5039                 /* Disable GFX parsing/prefetching */
5040                 gfx_v8_0_cp_gfx_enable(adev, false);
5041
5042         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5043             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5044             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5045             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5046                 int i;
5047
5048                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5049                         struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5050
5051                         mutex_lock(&adev->srbm_mutex);
5052                         vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5053                         gfx_v8_0_deactivate_hqd(adev, 2);
5054                         vi_srbm_select(adev, 0, 0, 0, 0);
5055                         mutex_unlock(&adev->srbm_mutex);
5056                 }
5057                 /* Disable MEC parsing/prefetching */
5058                 gfx_v8_0_cp_compute_enable(adev, false);
5059         }
5060
5061        return 0;
5062 }
5063
5064 static int gfx_v8_0_soft_reset(void *handle)
5065 {
5066         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5067         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5068         u32 tmp;
5069
5070         if ((!adev->gfx.grbm_soft_reset) &&
5071             (!adev->gfx.srbm_soft_reset))
5072                 return 0;
5073
5074         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5075         srbm_soft_reset = adev->gfx.srbm_soft_reset;
5076
5077         if (grbm_soft_reset || srbm_soft_reset) {
5078                 tmp = RREG32(mmGMCON_DEBUG);
5079                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 1);
5080                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 1);
5081                 WREG32(mmGMCON_DEBUG, tmp);
5082                 udelay(50);
5083         }
5084
5085         if (grbm_soft_reset) {
5086                 tmp = RREG32(mmGRBM_SOFT_RESET);
5087                 tmp |= grbm_soft_reset;
5088                 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5089                 WREG32(mmGRBM_SOFT_RESET, tmp);
5090                 tmp = RREG32(mmGRBM_SOFT_RESET);
5091
5092                 udelay(50);
5093
5094                 tmp &= ~grbm_soft_reset;
5095                 WREG32(mmGRBM_SOFT_RESET, tmp);
5096                 tmp = RREG32(mmGRBM_SOFT_RESET);
5097         }
5098
5099         if (srbm_soft_reset) {
5100                 tmp = RREG32(mmSRBM_SOFT_RESET);
5101                 tmp |= srbm_soft_reset;
5102                 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5103                 WREG32(mmSRBM_SOFT_RESET, tmp);
5104                 tmp = RREG32(mmSRBM_SOFT_RESET);
5105
5106                 udelay(50);
5107
5108                 tmp &= ~srbm_soft_reset;
5109                 WREG32(mmSRBM_SOFT_RESET, tmp);
5110                 tmp = RREG32(mmSRBM_SOFT_RESET);
5111         }
5112
5113         if (grbm_soft_reset || srbm_soft_reset) {
5114                 tmp = RREG32(mmGMCON_DEBUG);
5115                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 0);
5116                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 0);
5117                 WREG32(mmGMCON_DEBUG, tmp);
5118         }
5119
5120         /* Wait a little for things to settle down */
5121         udelay(50);
5122
5123         return 0;
5124 }
5125
5126 static int gfx_v8_0_post_soft_reset(void *handle)
5127 {
5128         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5129         u32 grbm_soft_reset = 0;
5130
5131         if ((!adev->gfx.grbm_soft_reset) &&
5132             (!adev->gfx.srbm_soft_reset))
5133                 return 0;
5134
5135         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5136
5137         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5138             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5139             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5140             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5141                 int i;
5142
5143                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5144                         struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5145
5146                         mutex_lock(&adev->srbm_mutex);
5147                         vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5148                         gfx_v8_0_deactivate_hqd(adev, 2);
5149                         vi_srbm_select(adev, 0, 0, 0, 0);
5150                         mutex_unlock(&adev->srbm_mutex);
5151                 }
5152                 gfx_v8_0_kiq_resume(adev);
5153                 gfx_v8_0_kcq_resume(adev);
5154         }
5155
5156         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5157             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5158                 gfx_v8_0_cp_gfx_resume(adev);
5159
5160         gfx_v8_0_cp_test_all_rings(adev);
5161
5162         adev->gfx.rlc.funcs->start(adev);
5163
5164         return 0;
5165 }
5166
5167 /**
5168  * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
5169  *
5170  * @adev: amdgpu_device pointer
5171  *
5172  * Fetches a GPU clock counter snapshot.
5173  * Returns the 64 bit clock counter snapshot.
5174  */
5175 static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
5176 {
5177         uint64_t clock;
5178
5179         mutex_lock(&adev->gfx.gpu_clock_mutex);
5180         WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
5181         clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
5182                 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5183         mutex_unlock(&adev->gfx.gpu_clock_mutex);
5184         return clock;
5185 }
5186
5187 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
5188                                           uint32_t vmid,
5189                                           uint32_t gds_base, uint32_t gds_size,
5190                                           uint32_t gws_base, uint32_t gws_size,
5191                                           uint32_t oa_base, uint32_t oa_size)
5192 {
5193         /* GDS Base */
5194         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5195         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5196                                 WRITE_DATA_DST_SEL(0)));
5197         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
5198         amdgpu_ring_write(ring, 0);
5199         amdgpu_ring_write(ring, gds_base);
5200
5201         /* GDS Size */
5202         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5203         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5204                                 WRITE_DATA_DST_SEL(0)));
5205         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
5206         amdgpu_ring_write(ring, 0);
5207         amdgpu_ring_write(ring, gds_size);
5208
5209         /* GWS */
5210         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5211         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5212                                 WRITE_DATA_DST_SEL(0)));
5213         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
5214         amdgpu_ring_write(ring, 0);
5215         amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
5216
5217         /* OA */
5218         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5219         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5220                                 WRITE_DATA_DST_SEL(0)));
5221         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
5222         amdgpu_ring_write(ring, 0);
5223         amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
5224 }
5225
5226 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
5227 {
5228         WREG32(mmSQ_IND_INDEX,
5229                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5230                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5231                 (address << SQ_IND_INDEX__INDEX__SHIFT) |
5232                 (SQ_IND_INDEX__FORCE_READ_MASK));
5233         return RREG32(mmSQ_IND_DATA);
5234 }
5235
5236 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
5237                            uint32_t wave, uint32_t thread,
5238                            uint32_t regno, uint32_t num, uint32_t *out)
5239 {
5240         WREG32(mmSQ_IND_INDEX,
5241                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5242                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5243                 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
5244                 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
5245                 (SQ_IND_INDEX__FORCE_READ_MASK) |
5246                 (SQ_IND_INDEX__AUTO_INCR_MASK));
5247         while (num--)
5248                 *(out++) = RREG32(mmSQ_IND_DATA);
5249 }
5250
5251 static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
5252 {
5253         /* type 0 wave data */
5254         dst[(*no_fields)++] = 0;
5255         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
5256         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
5257         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
5258         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
5259         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
5260         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
5261         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
5262         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
5263         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
5264         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
5265         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
5266         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
5267         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_LO);
5268         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_HI);
5269         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_LO);
5270         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI);
5271         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
5272         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
5273 }
5274
5275 static void gfx_v8_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
5276                                      uint32_t wave, uint32_t start,
5277                                      uint32_t size, uint32_t *dst)
5278 {
5279         wave_read_regs(
5280                 adev, simd, wave, 0,
5281                 start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
5282 }
5283
5284
5285 static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = {
5286         .get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter,
5287         .select_se_sh = &gfx_v8_0_select_se_sh,
5288         .read_wave_data = &gfx_v8_0_read_wave_data,
5289         .read_wave_sgprs = &gfx_v8_0_read_wave_sgprs,
5290         .select_me_pipe_q = &gfx_v8_0_select_me_pipe_q
5291 };
5292
5293 static int gfx_v8_0_early_init(void *handle)
5294 {
5295         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5296
5297         adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
5298         adev->gfx.num_compute_rings = amdgpu_num_kcq;
5299         adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
5300         gfx_v8_0_set_ring_funcs(adev);
5301         gfx_v8_0_set_irq_funcs(adev);
5302         gfx_v8_0_set_gds_init(adev);
5303         gfx_v8_0_set_rlc_funcs(adev);
5304
5305         return 0;
5306 }
5307
5308 static int gfx_v8_0_late_init(void *handle)
5309 {
5310         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5311         int r;
5312
5313         r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
5314         if (r)
5315                 return r;
5316
5317         r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
5318         if (r)
5319                 return r;
5320
5321         /* requires IBs so do in late init after IB pool is initialized */
5322         r = gfx_v8_0_do_edc_gpr_workarounds(adev);
5323         if (r)
5324                 return r;
5325
5326         r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0);
5327         if (r) {
5328                 DRM_ERROR("amdgpu_irq_get() failed to get IRQ for EDC, r: %d.\n", r);
5329                 return r;
5330         }
5331
5332         r = amdgpu_irq_get(adev, &adev->gfx.sq_irq, 0);
5333         if (r) {
5334                 DRM_ERROR(
5335                         "amdgpu_irq_get() failed to get IRQ for SQ, r: %d.\n",
5336                         r);
5337                 return r;
5338         }
5339
5340         return 0;
5341 }
5342
5343 static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
5344                                                        bool enable)
5345 {
5346         if ((adev->asic_type == CHIP_POLARIS11) ||
5347             (adev->asic_type == CHIP_POLARIS12) ||
5348             (adev->asic_type == CHIP_VEGAM))
5349                 /* Send msg to SMU via Powerplay */
5350                 amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, enable);
5351
5352         WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0);
5353 }
5354
5355 static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
5356                                                         bool enable)
5357 {
5358         WREG32_FIELD(RLC_PG_CNTL, DYN_PER_CU_PG_ENABLE, enable ? 1 : 0);
5359 }
5360
5361 static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
5362                 bool enable)
5363 {
5364         WREG32_FIELD(RLC_PG_CNTL, QUICK_PG_ENABLE, enable ? 1 : 0);
5365 }
5366
5367 static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
5368                                           bool enable)
5369 {
5370         WREG32_FIELD(RLC_PG_CNTL, GFX_POWER_GATING_ENABLE, enable ? 1 : 0);
5371 }
5372
5373 static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev,
5374                                                 bool enable)
5375 {
5376         WREG32_FIELD(RLC_PG_CNTL, GFX_PIPELINE_PG_ENABLE, enable ? 1 : 0);
5377
5378         /* Read any GFX register to wake up GFX. */
5379         if (!enable)
5380                 RREG32(mmDB_RENDER_CONTROL);
5381 }
5382
5383 static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev,
5384                                           bool enable)
5385 {
5386         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
5387                 cz_enable_gfx_cg_power_gating(adev, true);
5388                 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
5389                         cz_enable_gfx_pipeline_power_gating(adev, true);
5390         } else {
5391                 cz_enable_gfx_cg_power_gating(adev, false);
5392                 cz_enable_gfx_pipeline_power_gating(adev, false);
5393         }
5394 }
5395
5396 static int gfx_v8_0_set_powergating_state(void *handle,
5397                                           enum amd_powergating_state state)
5398 {
5399         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5400         bool enable = (state == AMD_PG_STATE_GATE);
5401
5402         if (amdgpu_sriov_vf(adev))
5403                 return 0;
5404
5405         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_SMG |
5406                                 AMD_PG_SUPPORT_RLC_SMU_HS |
5407                                 AMD_PG_SUPPORT_CP |
5408                                 AMD_PG_SUPPORT_GFX_DMG))
5409                 amdgpu_gfx_rlc_enter_safe_mode(adev);
5410         switch (adev->asic_type) {
5411         case CHIP_CARRIZO:
5412         case CHIP_STONEY:
5413
5414                 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
5415                         cz_enable_sck_slow_down_on_power_up(adev, true);
5416                         cz_enable_sck_slow_down_on_power_down(adev, true);
5417                 } else {
5418                         cz_enable_sck_slow_down_on_power_up(adev, false);
5419                         cz_enable_sck_slow_down_on_power_down(adev, false);
5420                 }
5421                 if (adev->pg_flags & AMD_PG_SUPPORT_CP)
5422                         cz_enable_cp_power_gating(adev, true);
5423                 else
5424                         cz_enable_cp_power_gating(adev, false);
5425
5426                 cz_update_gfx_cg_power_gating(adev, enable);
5427
5428                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5429                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5430                 else
5431                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5432
5433                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5434                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5435                 else
5436                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5437                 break;
5438         case CHIP_POLARIS11:
5439         case CHIP_POLARIS12:
5440         case CHIP_VEGAM:
5441                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5442                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5443                 else
5444                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5445
5446                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5447                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5448                 else
5449                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5450
5451                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable)
5452                         polaris11_enable_gfx_quick_mg_power_gating(adev, true);
5453                 else
5454                         polaris11_enable_gfx_quick_mg_power_gating(adev, false);
5455                 break;
5456         default:
5457                 break;
5458         }
5459         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_SMG |
5460                                 AMD_PG_SUPPORT_RLC_SMU_HS |
5461                                 AMD_PG_SUPPORT_CP |
5462                                 AMD_PG_SUPPORT_GFX_DMG))
5463                 amdgpu_gfx_rlc_exit_safe_mode(adev);
5464         return 0;
5465 }
5466
5467 static void gfx_v8_0_get_clockgating_state(void *handle, u32 *flags)
5468 {
5469         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5470         int data;
5471
5472         if (amdgpu_sriov_vf(adev))
5473                 *flags = 0;
5474
5475         /* AMD_CG_SUPPORT_GFX_MGCG */
5476         data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5477         if (!(data & RLC_CGTT_MGCG_OVERRIDE__CPF_MASK))
5478                 *flags |= AMD_CG_SUPPORT_GFX_MGCG;
5479
5480         /* AMD_CG_SUPPORT_GFX_CGLG */
5481         data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5482         if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5483                 *flags |= AMD_CG_SUPPORT_GFX_CGCG;
5484
5485         /* AMD_CG_SUPPORT_GFX_CGLS */
5486         if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5487                 *flags |= AMD_CG_SUPPORT_GFX_CGLS;
5488
5489         /* AMD_CG_SUPPORT_GFX_CGTS */
5490         data = RREG32(mmCGTS_SM_CTRL_REG);
5491         if (!(data & CGTS_SM_CTRL_REG__OVERRIDE_MASK))
5492                 *flags |= AMD_CG_SUPPORT_GFX_CGTS;
5493
5494         /* AMD_CG_SUPPORT_GFX_CGTS_LS */
5495         if (!(data & CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK))
5496                 *flags |= AMD_CG_SUPPORT_GFX_CGTS_LS;
5497
5498         /* AMD_CG_SUPPORT_GFX_RLC_LS */
5499         data = RREG32(mmRLC_MEM_SLP_CNTL);
5500         if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
5501                 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
5502
5503         /* AMD_CG_SUPPORT_GFX_CP_LS */
5504         data = RREG32(mmCP_MEM_SLP_CNTL);
5505         if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
5506                 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
5507 }
5508
5509 static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
5510                                      uint32_t reg_addr, uint32_t cmd)
5511 {
5512         uint32_t data;
5513
5514         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
5515
5516         WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5517         WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5518
5519         data = RREG32(mmRLC_SERDES_WR_CTRL);
5520         if (adev->asic_type == CHIP_STONEY)
5521                 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5522                           RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5523                           RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5524                           RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5525                           RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5526                           RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5527                           RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5528                           RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5529                           RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5530         else
5531                 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5532                           RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5533                           RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5534                           RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5535                           RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5536                           RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5537                           RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5538                           RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5539                           RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
5540                           RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
5541                           RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5542         data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
5543                  (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
5544                  (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
5545                  (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
5546
5547         WREG32(mmRLC_SERDES_WR_CTRL, data);
5548 }
5549
5550 #define MSG_ENTER_RLC_SAFE_MODE     1
5551 #define MSG_EXIT_RLC_SAFE_MODE      0
5552 #define RLC_GPR_REG2__REQ_MASK 0x00000001
5553 #define RLC_GPR_REG2__REQ__SHIFT 0
5554 #define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001
5555 #define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e
5556
5557 static bool gfx_v8_0_is_rlc_enabled(struct amdgpu_device *adev)
5558 {
5559         uint32_t rlc_setting;
5560
5561         rlc_setting = RREG32(mmRLC_CNTL);
5562         if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
5563                 return false;
5564
5565         return true;
5566 }
5567
5568 static void gfx_v8_0_set_safe_mode(struct amdgpu_device *adev)
5569 {
5570         uint32_t data;
5571         unsigned i;
5572         data = RREG32(mmRLC_CNTL);
5573         data |= RLC_SAFE_MODE__CMD_MASK;
5574         data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5575         data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
5576         WREG32(mmRLC_SAFE_MODE, data);
5577
5578         /* wait for RLC_SAFE_MODE */
5579         for (i = 0; i < adev->usec_timeout; i++) {
5580                 if ((RREG32(mmRLC_GPM_STAT) &
5581                      (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5582                       RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5583                     (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5584                      RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5585                         break;
5586                 udelay(1);
5587         }
5588         for (i = 0; i < adev->usec_timeout; i++) {
5589                 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5590                         break;
5591                 udelay(1);
5592         }
5593 }
5594
5595 static void gfx_v8_0_unset_safe_mode(struct amdgpu_device *adev)
5596 {
5597         uint32_t data;
5598         unsigned i;
5599
5600         data = RREG32(mmRLC_CNTL);
5601         data |= RLC_SAFE_MODE__CMD_MASK;
5602         data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5603         WREG32(mmRLC_SAFE_MODE, data);
5604
5605         for (i = 0; i < adev->usec_timeout; i++) {
5606                 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5607                         break;
5608                 udelay(1);
5609         }
5610 }
5611
5612 static void gfx_v8_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid)
5613 {
5614         u32 data;
5615
5616         if (amdgpu_sriov_is_pp_one_vf(adev))
5617                 data = RREG32_NO_KIQ(mmRLC_SPM_VMID);
5618         else
5619                 data = RREG32(mmRLC_SPM_VMID);
5620
5621         data &= ~RLC_SPM_VMID__RLC_SPM_VMID_MASK;
5622         data |= (vmid & RLC_SPM_VMID__RLC_SPM_VMID_MASK) << RLC_SPM_VMID__RLC_SPM_VMID__SHIFT;
5623
5624         if (amdgpu_sriov_is_pp_one_vf(adev))
5625                 WREG32_NO_KIQ(mmRLC_SPM_VMID, data);
5626         else
5627                 WREG32(mmRLC_SPM_VMID, data);
5628 }
5629
5630 static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
5631         .is_rlc_enabled = gfx_v8_0_is_rlc_enabled,
5632         .set_safe_mode = gfx_v8_0_set_safe_mode,
5633         .unset_safe_mode = gfx_v8_0_unset_safe_mode,
5634         .init = gfx_v8_0_rlc_init,
5635         .get_csb_size = gfx_v8_0_get_csb_size,
5636         .get_csb_buffer = gfx_v8_0_get_csb_buffer,
5637         .get_cp_table_num = gfx_v8_0_cp_jump_table_num,
5638         .resume = gfx_v8_0_rlc_resume,
5639         .stop = gfx_v8_0_rlc_stop,
5640         .reset = gfx_v8_0_rlc_reset,
5641         .start = gfx_v8_0_rlc_start,
5642         .update_spm_vmid = gfx_v8_0_update_spm_vmid
5643 };
5644
5645 static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
5646                                                       bool enable)
5647 {
5648         uint32_t temp, data;
5649
5650         amdgpu_gfx_rlc_enter_safe_mode(adev);
5651
5652         /* It is disabled by HW by default */
5653         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
5654                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5655                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS)
5656                                 /* 1 - RLC memory Light sleep */
5657                                 WREG32_FIELD(RLC_MEM_SLP_CNTL, RLC_MEM_LS_EN, 1);
5658
5659                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS)
5660                                 WREG32_FIELD(CP_MEM_SLP_CNTL, CP_MEM_LS_EN, 1);
5661                 }
5662
5663                 /* 3 - RLC_CGTT_MGCG_OVERRIDE */
5664                 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5665                 if (adev->flags & AMD_IS_APU)
5666                         data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5667                                   RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5668                                   RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK);
5669                 else
5670                         data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5671                                   RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5672                                   RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5673                                   RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5674
5675                 if (temp != data)
5676                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5677
5678                 /* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5679                 gfx_v8_0_wait_for_rlc_serdes(adev);
5680
5681                 /* 5 - clear mgcg override */
5682                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5683
5684                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
5685                         /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
5686                         temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5687                         data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
5688                         data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
5689                         data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
5690                         data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
5691                         if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
5692                             (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
5693                                 data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
5694                         data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
5695                         data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
5696                         if (temp != data)
5697                                 WREG32(mmCGTS_SM_CTRL_REG, data);
5698                 }
5699                 udelay(50);
5700
5701                 /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5702                 gfx_v8_0_wait_for_rlc_serdes(adev);
5703         } else {
5704                 /* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
5705                 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5706                 data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5707                                 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5708                                 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5709                                 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5710                 if (temp != data)
5711                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5712
5713                 /* 2 - disable MGLS in RLC */
5714                 data = RREG32(mmRLC_MEM_SLP_CNTL);
5715                 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
5716                         data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5717                         WREG32(mmRLC_MEM_SLP_CNTL, data);
5718                 }
5719
5720                 /* 3 - disable MGLS in CP */
5721                 data = RREG32(mmCP_MEM_SLP_CNTL);
5722                 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
5723                         data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5724                         WREG32(mmCP_MEM_SLP_CNTL, data);
5725                 }
5726
5727                 /* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
5728                 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5729                 data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
5730                                 CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
5731                 if (temp != data)
5732                         WREG32(mmCGTS_SM_CTRL_REG, data);
5733
5734                 /* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5735                 gfx_v8_0_wait_for_rlc_serdes(adev);
5736
5737                 /* 6 - set mgcg override */
5738                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5739
5740                 udelay(50);
5741
5742                 /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5743                 gfx_v8_0_wait_for_rlc_serdes(adev);
5744         }
5745
5746         amdgpu_gfx_rlc_exit_safe_mode(adev);
5747 }
5748
5749 static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5750                                                       bool enable)
5751 {
5752         uint32_t temp, temp1, data, data1;
5753
5754         temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5755
5756         amdgpu_gfx_rlc_enter_safe_mode(adev);
5757
5758         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
5759                 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5760                 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
5761                 if (temp1 != data1)
5762                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5763
5764                 /* : wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5765                 gfx_v8_0_wait_for_rlc_serdes(adev);
5766
5767                 /* 2 - clear cgcg override */
5768                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5769
5770                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5771                 gfx_v8_0_wait_for_rlc_serdes(adev);
5772
5773                 /* 3 - write cmd to set CGLS */
5774                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
5775
5776                 /* 4 - enable cgcg */
5777                 data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5778
5779                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5780                         /* enable cgls*/
5781                         data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5782
5783                         temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5784                         data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
5785
5786                         if (temp1 != data1)
5787                                 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5788                 } else {
5789                         data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5790                 }
5791
5792                 if (temp != data)
5793                         WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5794
5795                 /* 5 enable cntx_empty_int_enable/cntx_busy_int_enable/
5796                  * Cmp_busy/GFX_Idle interrupts
5797                  */
5798                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5799         } else {
5800                 /* disable cntx_empty_int_enable & GFX Idle interrupt */
5801                 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5802
5803                 /* TEST CGCG */
5804                 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5805                 data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
5806                                 RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
5807                 if (temp1 != data1)
5808                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5809
5810                 /* read gfx register to wake up cgcg */
5811                 RREG32(mmCB_CGTT_SCLK_CTRL);
5812                 RREG32(mmCB_CGTT_SCLK_CTRL);
5813                 RREG32(mmCB_CGTT_SCLK_CTRL);
5814                 RREG32(mmCB_CGTT_SCLK_CTRL);
5815
5816                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5817                 gfx_v8_0_wait_for_rlc_serdes(adev);
5818
5819                 /* write cmd to Set CGCG Overrride */
5820                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5821
5822                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5823                 gfx_v8_0_wait_for_rlc_serdes(adev);
5824
5825                 /* write cmd to Clear CGLS */
5826                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
5827
5828                 /* disable cgcg, cgls should be disabled too. */
5829                 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
5830                           RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
5831                 if (temp != data)
5832                         WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5833                 /* enable interrupts again for PG */
5834                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5835         }
5836
5837         gfx_v8_0_wait_for_rlc_serdes(adev);
5838
5839         amdgpu_gfx_rlc_exit_safe_mode(adev);
5840 }
5841 static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
5842                                             bool enable)
5843 {
5844         if (enable) {
5845                 /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
5846                  * ===  MGCG + MGLS + TS(CG/LS) ===
5847                  */
5848                 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5849                 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5850         } else {
5851                 /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
5852                  * ===  CGCG + CGLS ===
5853                  */
5854                 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5855                 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5856         }
5857         return 0;
5858 }
5859
5860 static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev,
5861                                           enum amd_clockgating_state state)
5862 {
5863         uint32_t msg_id, pp_state = 0;
5864         uint32_t pp_support_state = 0;
5865
5866         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
5867                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5868                         pp_support_state = PP_STATE_SUPPORT_LS;
5869                         pp_state = PP_STATE_LS;
5870                 }
5871                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
5872                         pp_support_state |= PP_STATE_SUPPORT_CG;
5873                         pp_state |= PP_STATE_CG;
5874                 }
5875                 if (state == AMD_CG_STATE_UNGATE)
5876                         pp_state = 0;
5877
5878                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5879                                 PP_BLOCK_GFX_CG,
5880                                 pp_support_state,
5881                                 pp_state);
5882                 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5883         }
5884
5885         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
5886                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5887                         pp_support_state = PP_STATE_SUPPORT_LS;
5888                         pp_state = PP_STATE_LS;
5889                 }
5890
5891                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
5892                         pp_support_state |= PP_STATE_SUPPORT_CG;
5893                         pp_state |= PP_STATE_CG;
5894                 }
5895
5896                 if (state == AMD_CG_STATE_UNGATE)
5897                         pp_state = 0;
5898
5899                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5900                                 PP_BLOCK_GFX_MG,
5901                                 pp_support_state,
5902                                 pp_state);
5903                 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5904         }
5905
5906         return 0;
5907 }
5908
5909 static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev,
5910                                           enum amd_clockgating_state state)
5911 {
5912
5913         uint32_t msg_id, pp_state = 0;
5914         uint32_t pp_support_state = 0;
5915
5916         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
5917                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5918                         pp_support_state = PP_STATE_SUPPORT_LS;
5919                         pp_state = PP_STATE_LS;
5920                 }
5921                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
5922                         pp_support_state |= PP_STATE_SUPPORT_CG;
5923                         pp_state |= PP_STATE_CG;
5924                 }
5925                 if (state == AMD_CG_STATE_UNGATE)
5926                         pp_state = 0;
5927
5928                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5929                                 PP_BLOCK_GFX_CG,
5930                                 pp_support_state,
5931                                 pp_state);
5932                 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5933         }
5934
5935         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_GFX_3D_CGLS)) {
5936                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) {
5937                         pp_support_state = PP_STATE_SUPPORT_LS;
5938                         pp_state = PP_STATE_LS;
5939                 }
5940                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) {
5941                         pp_support_state |= PP_STATE_SUPPORT_CG;
5942                         pp_state |= PP_STATE_CG;
5943                 }
5944                 if (state == AMD_CG_STATE_UNGATE)
5945                         pp_state = 0;
5946
5947                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5948                                 PP_BLOCK_GFX_3D,
5949                                 pp_support_state,
5950                                 pp_state);
5951                 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5952         }
5953
5954         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
5955                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5956                         pp_support_state = PP_STATE_SUPPORT_LS;
5957                         pp_state = PP_STATE_LS;
5958                 }
5959
5960                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
5961                         pp_support_state |= PP_STATE_SUPPORT_CG;
5962                         pp_state |= PP_STATE_CG;
5963                 }
5964
5965                 if (state == AMD_CG_STATE_UNGATE)
5966                         pp_state = 0;
5967
5968                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5969                                 PP_BLOCK_GFX_MG,
5970                                 pp_support_state,
5971                                 pp_state);
5972                 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5973         }
5974
5975         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
5976                 pp_support_state = PP_STATE_SUPPORT_LS;
5977
5978                 if (state == AMD_CG_STATE_UNGATE)
5979                         pp_state = 0;
5980                 else
5981                         pp_state = PP_STATE_LS;
5982
5983                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5984                                 PP_BLOCK_GFX_RLC,
5985                                 pp_support_state,
5986                                 pp_state);
5987                 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5988         }
5989
5990         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
5991                 pp_support_state = PP_STATE_SUPPORT_LS;
5992
5993                 if (state == AMD_CG_STATE_UNGATE)
5994                         pp_state = 0;
5995                 else
5996                         pp_state = PP_STATE_LS;
5997                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5998                         PP_BLOCK_GFX_CP,
5999                         pp_support_state,
6000                         pp_state);
6001                 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6002         }
6003
6004         return 0;
6005 }
6006
6007 static int gfx_v8_0_set_clockgating_state(void *handle,
6008                                           enum amd_clockgating_state state)
6009 {
6010         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
6011
6012         if (amdgpu_sriov_vf(adev))
6013                 return 0;
6014
6015         switch (adev->asic_type) {
6016         case CHIP_FIJI:
6017         case CHIP_CARRIZO:
6018         case CHIP_STONEY:
6019                 gfx_v8_0_update_gfx_clock_gating(adev,
6020                                                  state == AMD_CG_STATE_GATE);
6021                 break;
6022         case CHIP_TONGA:
6023                 gfx_v8_0_tonga_update_gfx_clock_gating(adev, state);
6024                 break;
6025         case CHIP_POLARIS10:
6026         case CHIP_POLARIS11:
6027         case CHIP_POLARIS12:
6028         case CHIP_VEGAM:
6029                 gfx_v8_0_polaris_update_gfx_clock_gating(adev, state);
6030                 break;
6031         default:
6032                 break;
6033         }
6034         return 0;
6035 }
6036
6037 static u64 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring)
6038 {
6039         return ring->adev->wb.wb[ring->rptr_offs];
6040 }
6041
6042 static u64 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
6043 {
6044         struct amdgpu_device *adev = ring->adev;
6045
6046         if (ring->use_doorbell)
6047                 /* XXX check if swapping is necessary on BE */
6048                 return ring->adev->wb.wb[ring->wptr_offs];
6049         else
6050                 return RREG32(mmCP_RB0_WPTR);
6051 }
6052
6053 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
6054 {
6055         struct amdgpu_device *adev = ring->adev;
6056
6057         if (ring->use_doorbell) {
6058                 /* XXX check if swapping is necessary on BE */
6059                 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6060                 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6061         } else {
6062                 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
6063                 (void)RREG32(mmCP_RB0_WPTR);
6064         }
6065 }
6066
6067 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
6068 {
6069         u32 ref_and_mask, reg_mem_engine;
6070
6071         if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) ||
6072             (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)) {
6073                 switch (ring->me) {
6074                 case 1:
6075                         ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
6076                         break;
6077                 case 2:
6078                         ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
6079                         break;
6080                 default:
6081                         return;
6082                 }
6083                 reg_mem_engine = 0;
6084         } else {
6085                 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
6086                 reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
6087         }
6088
6089         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6090         amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
6091                                  WAIT_REG_MEM_FUNCTION(3) |  /* == */
6092                                  reg_mem_engine));
6093         amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
6094         amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
6095         amdgpu_ring_write(ring, ref_and_mask);
6096         amdgpu_ring_write(ring, ref_and_mask);
6097         amdgpu_ring_write(ring, 0x20); /* poll interval */
6098 }
6099
6100 static void gfx_v8_0_ring_emit_vgt_flush(struct amdgpu_ring *ring)
6101 {
6102         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6103         amdgpu_ring_write(ring, EVENT_TYPE(VS_PARTIAL_FLUSH) |
6104                 EVENT_INDEX(4));
6105
6106         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6107         amdgpu_ring_write(ring, EVENT_TYPE(VGT_FLUSH) |
6108                 EVENT_INDEX(0));
6109 }
6110
6111 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
6112                                         struct amdgpu_job *job,
6113                                         struct amdgpu_ib *ib,
6114                                         uint32_t flags)
6115 {
6116         unsigned vmid = AMDGPU_JOB_GET_VMID(job);
6117         u32 header, control = 0;
6118
6119         if (ib->flags & AMDGPU_IB_FLAG_CE)
6120                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
6121         else
6122                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
6123
6124         control |= ib->length_dw | (vmid << 24);
6125
6126         if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
6127                 control |= INDIRECT_BUFFER_PRE_ENB(1);
6128
6129                 if (!(ib->flags & AMDGPU_IB_FLAG_CE) && vmid)
6130                         gfx_v8_0_ring_emit_de_meta(ring);
6131         }
6132
6133         amdgpu_ring_write(ring, header);
6134         amdgpu_ring_write(ring,
6135 #ifdef __BIG_ENDIAN
6136                           (2 << 0) |
6137 #endif
6138                           (ib->gpu_addr & 0xFFFFFFFC));
6139         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6140         amdgpu_ring_write(ring, control);
6141 }
6142
6143 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
6144                                           struct amdgpu_job *job,
6145                                           struct amdgpu_ib *ib,
6146                                           uint32_t flags)
6147 {
6148         unsigned vmid = AMDGPU_JOB_GET_VMID(job);
6149         u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
6150
6151         /* Currently, there is a high possibility to get wave ID mismatch
6152          * between ME and GDS, leading to a hw deadlock, because ME generates
6153          * different wave IDs than the GDS expects. This situation happens
6154          * randomly when at least 5 compute pipes use GDS ordered append.
6155          * The wave IDs generated by ME are also wrong after suspend/resume.
6156          * Those are probably bugs somewhere else in the kernel driver.
6157          *
6158          * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
6159          * GDS to 0 for this ring (me/pipe).
6160          */
6161         if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
6162                 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
6163                 amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID - PACKET3_SET_CONFIG_REG_START);
6164                 amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
6165         }
6166
6167         amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
6168         amdgpu_ring_write(ring,
6169 #ifdef __BIG_ENDIAN
6170                                 (2 << 0) |
6171 #endif
6172                                 (ib->gpu_addr & 0xFFFFFFFC));
6173         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6174         amdgpu_ring_write(ring, control);
6175 }
6176
6177 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
6178                                          u64 seq, unsigned flags)
6179 {
6180         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6181         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6182
6183         /* Workaround for cache flush problems. First send a dummy EOP
6184          * event down the pipe with seq one below.
6185          */
6186         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6187         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6188                                  EOP_TC_ACTION_EN |
6189                                  EOP_TC_WB_ACTION_EN |
6190                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6191                                  EVENT_INDEX(5)));
6192         amdgpu_ring_write(ring, addr & 0xfffffffc);
6193         amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
6194                                 DATA_SEL(1) | INT_SEL(0));
6195         amdgpu_ring_write(ring, lower_32_bits(seq - 1));
6196         amdgpu_ring_write(ring, upper_32_bits(seq - 1));
6197
6198         /* Then send the real EOP event down the pipe:
6199          * EVENT_WRITE_EOP - flush caches, send int */
6200         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6201         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6202                                  EOP_TC_ACTION_EN |
6203                                  EOP_TC_WB_ACTION_EN |
6204                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6205                                  EVENT_INDEX(5)));
6206         amdgpu_ring_write(ring, addr & 0xfffffffc);
6207         amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
6208                           DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6209         amdgpu_ring_write(ring, lower_32_bits(seq));
6210         amdgpu_ring_write(ring, upper_32_bits(seq));
6211
6212 }
6213
6214 static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
6215 {
6216         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6217         uint32_t seq = ring->fence_drv.sync_seq;
6218         uint64_t addr = ring->fence_drv.gpu_addr;
6219
6220         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6221         amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
6222                                  WAIT_REG_MEM_FUNCTION(3) | /* equal */
6223                                  WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
6224         amdgpu_ring_write(ring, addr & 0xfffffffc);
6225         amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
6226         amdgpu_ring_write(ring, seq);
6227         amdgpu_ring_write(ring, 0xffffffff);
6228         amdgpu_ring_write(ring, 4); /* poll interval */
6229 }
6230
6231 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
6232                                         unsigned vmid, uint64_t pd_addr)
6233 {
6234         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6235
6236         amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
6237
6238         /* wait for the invalidate to complete */
6239         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6240         amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6241                                  WAIT_REG_MEM_FUNCTION(0) |  /* always */
6242                                  WAIT_REG_MEM_ENGINE(0))); /* me */
6243         amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6244         amdgpu_ring_write(ring, 0);
6245         amdgpu_ring_write(ring, 0); /* ref */
6246         amdgpu_ring_write(ring, 0); /* mask */
6247         amdgpu_ring_write(ring, 0x20); /* poll interval */
6248
6249         /* compute doesn't have PFP */
6250         if (usepfp) {
6251                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
6252                 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6253                 amdgpu_ring_write(ring, 0x0);
6254         }
6255 }
6256
6257 static u64 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
6258 {
6259         return ring->adev->wb.wb[ring->wptr_offs];
6260 }
6261
6262 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
6263 {
6264         struct amdgpu_device *adev = ring->adev;
6265
6266         /* XXX check if swapping is necessary on BE */
6267         adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6268         WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6269 }
6270
6271 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
6272                                              u64 addr, u64 seq,
6273                                              unsigned flags)
6274 {
6275         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6276         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6277
6278         /* RELEASE_MEM - flush caches, send int */
6279         amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
6280         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6281                                  EOP_TC_ACTION_EN |
6282                                  EOP_TC_WB_ACTION_EN |
6283                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6284                                  EVENT_INDEX(5)));
6285         amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6286         amdgpu_ring_write(ring, addr & 0xfffffffc);
6287         amdgpu_ring_write(ring, upper_32_bits(addr));
6288         amdgpu_ring_write(ring, lower_32_bits(seq));
6289         amdgpu_ring_write(ring, upper_32_bits(seq));
6290 }
6291
6292 static void gfx_v8_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
6293                                          u64 seq, unsigned int flags)
6294 {
6295         /* we only allocate 32bit for each seq wb address */
6296         BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
6297
6298         /* write fence seq to the "addr" */
6299         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6300         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6301                                  WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
6302         amdgpu_ring_write(ring, lower_32_bits(addr));
6303         amdgpu_ring_write(ring, upper_32_bits(addr));
6304         amdgpu_ring_write(ring, lower_32_bits(seq));
6305
6306         if (flags & AMDGPU_FENCE_FLAG_INT) {
6307                 /* set register to trigger INT */
6308                 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6309                 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6310                                          WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
6311                 amdgpu_ring_write(ring, mmCPC_INT_STATUS);
6312                 amdgpu_ring_write(ring, 0);
6313                 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
6314         }
6315 }
6316
6317 static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring)
6318 {
6319         amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6320         amdgpu_ring_write(ring, 0);
6321 }
6322
6323 static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
6324 {
6325         uint32_t dw2 = 0;
6326
6327         if (amdgpu_sriov_vf(ring->adev))
6328                 gfx_v8_0_ring_emit_ce_meta(ring);
6329
6330         dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
6331         if (flags & AMDGPU_HAVE_CTX_SWITCH) {
6332                 gfx_v8_0_ring_emit_vgt_flush(ring);
6333                 /* set load_global_config & load_global_uconfig */
6334                 dw2 |= 0x8001;
6335                 /* set load_cs_sh_regs */
6336                 dw2 |= 0x01000000;
6337                 /* set load_per_context_state & load_gfx_sh_regs for GFX */
6338                 dw2 |= 0x10002;
6339
6340                 /* set load_ce_ram if preamble presented */
6341                 if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
6342                         dw2 |= 0x10000000;
6343         } else {
6344                 /* still load_ce_ram if this is the first time preamble presented
6345                  * although there is no context switch happens.
6346                  */
6347                 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
6348                         dw2 |= 0x10000000;
6349         }
6350
6351         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6352         amdgpu_ring_write(ring, dw2);
6353         amdgpu_ring_write(ring, 0);
6354 }
6355
6356 static unsigned gfx_v8_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
6357 {
6358         unsigned ret;
6359
6360         amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
6361         amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
6362         amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
6363         amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
6364         ret = ring->wptr & ring->buf_mask;
6365         amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
6366         return ret;
6367 }
6368
6369 static void gfx_v8_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
6370 {
6371         unsigned cur;
6372
6373         BUG_ON(offset > ring->buf_mask);
6374         BUG_ON(ring->ring[offset] != 0x55aa55aa);
6375
6376         cur = (ring->wptr & ring->buf_mask) - 1;
6377         if (likely(cur > offset))
6378                 ring->ring[offset] = cur - offset;
6379         else
6380                 ring->ring[offset] = (ring->ring_size >> 2) - offset + cur;
6381 }
6382
6383 static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg,
6384                                     uint32_t reg_val_offs)
6385 {
6386         struct amdgpu_device *adev = ring->adev;
6387
6388         amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
6389         amdgpu_ring_write(ring, 0 |     /* src: register*/
6390                                 (5 << 8) |      /* dst: memory */
6391                                 (1 << 20));     /* write confirm */
6392         amdgpu_ring_write(ring, reg);
6393         amdgpu_ring_write(ring, 0);
6394         amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
6395                                 reg_val_offs * 4));
6396         amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
6397                                 reg_val_offs * 4));
6398 }
6399
6400 static void gfx_v8_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
6401                                   uint32_t val)
6402 {
6403         uint32_t cmd;
6404
6405         switch (ring->funcs->type) {
6406         case AMDGPU_RING_TYPE_GFX:
6407                 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
6408                 break;
6409         case AMDGPU_RING_TYPE_KIQ:
6410                 cmd = 1 << 16; /* no inc addr */
6411                 break;
6412         default:
6413                 cmd = WR_CONFIRM;
6414                 break;
6415         }
6416
6417         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6418         amdgpu_ring_write(ring, cmd);
6419         amdgpu_ring_write(ring, reg);
6420         amdgpu_ring_write(ring, 0);
6421         amdgpu_ring_write(ring, val);
6422 }
6423
6424 static void gfx_v8_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
6425 {
6426         struct amdgpu_device *adev = ring->adev;
6427         uint32_t value = 0;
6428
6429         value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
6430         value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
6431         value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
6432         value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
6433         WREG32(mmSQ_CMD, value);
6434 }
6435
6436 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
6437                                                  enum amdgpu_interrupt_state state)
6438 {
6439         WREG32_FIELD(CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE,
6440                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6441 }
6442
6443 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
6444                                                      int me, int pipe,
6445                                                      enum amdgpu_interrupt_state state)
6446 {
6447         u32 mec_int_cntl, mec_int_cntl_reg;
6448
6449         /*
6450          * amdgpu controls only the first MEC. That's why this function only
6451          * handles the setting of interrupts for this specific MEC. All other
6452          * pipes' interrupts are set by amdkfd.
6453          */
6454
6455         if (me == 1) {
6456                 switch (pipe) {
6457                 case 0:
6458                         mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
6459                         break;
6460                 case 1:
6461                         mec_int_cntl_reg = mmCP_ME1_PIPE1_INT_CNTL;
6462                         break;
6463                 case 2:
6464                         mec_int_cntl_reg = mmCP_ME1_PIPE2_INT_CNTL;
6465                         break;
6466                 case 3:
6467                         mec_int_cntl_reg = mmCP_ME1_PIPE3_INT_CNTL;
6468                         break;
6469                 default:
6470                         DRM_DEBUG("invalid pipe %d\n", pipe);
6471                         return;
6472                 }
6473         } else {
6474                 DRM_DEBUG("invalid me %d\n", me);
6475                 return;
6476         }
6477
6478         switch (state) {
6479         case AMDGPU_IRQ_STATE_DISABLE:
6480                 mec_int_cntl = RREG32(mec_int_cntl_reg);
6481                 mec_int_cntl &= ~CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6482                 WREG32(mec_int_cntl_reg, mec_int_cntl);
6483                 break;
6484         case AMDGPU_IRQ_STATE_ENABLE:
6485                 mec_int_cntl = RREG32(mec_int_cntl_reg);
6486                 mec_int_cntl |= CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6487                 WREG32(mec_int_cntl_reg, mec_int_cntl);
6488                 break;
6489         default:
6490                 break;
6491         }
6492 }
6493
6494 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
6495                                              struct amdgpu_irq_src *source,
6496                                              unsigned type,
6497                                              enum amdgpu_interrupt_state state)
6498 {
6499         WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE,
6500                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6501
6502         return 0;
6503 }
6504
6505 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
6506                                               struct amdgpu_irq_src *source,
6507                                               unsigned type,
6508                                               enum amdgpu_interrupt_state state)
6509 {
6510         WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE,
6511                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6512
6513         return 0;
6514 }
6515
6516 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6517                                             struct amdgpu_irq_src *src,
6518                                             unsigned type,
6519                                             enum amdgpu_interrupt_state state)
6520 {
6521         switch (type) {
6522         case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
6523                 gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
6524                 break;
6525         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6526                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6527                 break;
6528         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6529                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6530                 break;
6531         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6532                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6533                 break;
6534         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6535                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6536                 break;
6537         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6538                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6539                 break;
6540         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6541                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6542                 break;
6543         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6544                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6545                 break;
6546         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6547                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6548                 break;
6549         default:
6550                 break;
6551         }
6552         return 0;
6553 }
6554
6555 static int gfx_v8_0_set_cp_ecc_int_state(struct amdgpu_device *adev,
6556                                          struct amdgpu_irq_src *source,
6557                                          unsigned int type,
6558                                          enum amdgpu_interrupt_state state)
6559 {
6560         int enable_flag;
6561
6562         switch (state) {
6563         case AMDGPU_IRQ_STATE_DISABLE:
6564                 enable_flag = 0;
6565                 break;
6566
6567         case AMDGPU_IRQ_STATE_ENABLE:
6568                 enable_flag = 1;
6569                 break;
6570
6571         default:
6572                 return -EINVAL;
6573         }
6574
6575         WREG32_FIELD(CP_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6576         WREG32_FIELD(CP_INT_CNTL_RING0, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6577         WREG32_FIELD(CP_INT_CNTL_RING1, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6578         WREG32_FIELD(CP_INT_CNTL_RING2, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6579         WREG32_FIELD(CPC_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6580         WREG32_FIELD(CP_ME1_PIPE0_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6581                      enable_flag);
6582         WREG32_FIELD(CP_ME1_PIPE1_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6583                      enable_flag);
6584         WREG32_FIELD(CP_ME1_PIPE2_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6585                      enable_flag);
6586         WREG32_FIELD(CP_ME1_PIPE3_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6587                      enable_flag);
6588         WREG32_FIELD(CP_ME2_PIPE0_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6589                      enable_flag);
6590         WREG32_FIELD(CP_ME2_PIPE1_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6591                      enable_flag);
6592         WREG32_FIELD(CP_ME2_PIPE2_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6593                      enable_flag);
6594         WREG32_FIELD(CP_ME2_PIPE3_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6595                      enable_flag);
6596
6597         return 0;
6598 }
6599
6600 static int gfx_v8_0_set_sq_int_state(struct amdgpu_device *adev,
6601                                      struct amdgpu_irq_src *source,
6602                                      unsigned int type,
6603                                      enum amdgpu_interrupt_state state)
6604 {
6605         int enable_flag;
6606
6607         switch (state) {
6608         case AMDGPU_IRQ_STATE_DISABLE:
6609                 enable_flag = 1;
6610                 break;
6611
6612         case AMDGPU_IRQ_STATE_ENABLE:
6613                 enable_flag = 0;
6614                 break;
6615
6616         default:
6617                 return -EINVAL;
6618         }
6619
6620         WREG32_FIELD(SQ_INTERRUPT_MSG_CTRL, STALL,
6621                      enable_flag);
6622
6623         return 0;
6624 }
6625
6626 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
6627                             struct amdgpu_irq_src *source,
6628                             struct amdgpu_iv_entry *entry)
6629 {
6630         int i;
6631         u8 me_id, pipe_id, queue_id;
6632         struct amdgpu_ring *ring;
6633
6634         DRM_DEBUG("IH: CP EOP\n");
6635         me_id = (entry->ring_id & 0x0c) >> 2;
6636         pipe_id = (entry->ring_id & 0x03) >> 0;
6637         queue_id = (entry->ring_id & 0x70) >> 4;
6638
6639         switch (me_id) {
6640         case 0:
6641                 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6642                 break;
6643         case 1:
6644         case 2:
6645                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6646                         ring = &adev->gfx.compute_ring[i];
6647                         /* Per-queue interrupt is supported for MEC starting from VI.
6648                           * The interrupt can only be enabled/disabled per pipe instead of per queue.
6649                           */
6650                         if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6651                                 amdgpu_fence_process(ring);
6652                 }
6653                 break;
6654         }
6655         return 0;
6656 }
6657
6658 static void gfx_v8_0_fault(struct amdgpu_device *adev,
6659                            struct amdgpu_iv_entry *entry)
6660 {
6661         u8 me_id, pipe_id, queue_id;
6662         struct amdgpu_ring *ring;
6663         int i;
6664
6665         me_id = (entry->ring_id & 0x0c) >> 2;
6666         pipe_id = (entry->ring_id & 0x03) >> 0;
6667         queue_id = (entry->ring_id & 0x70) >> 4;
6668
6669         switch (me_id) {
6670         case 0:
6671                 drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
6672                 break;
6673         case 1:
6674         case 2:
6675                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6676                         ring = &adev->gfx.compute_ring[i];
6677                         if (ring->me == me_id && ring->pipe == pipe_id &&
6678                             ring->queue == queue_id)
6679                                 drm_sched_fault(&ring->sched);
6680                 }
6681                 break;
6682         }
6683 }
6684
6685 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
6686                                  struct amdgpu_irq_src *source,
6687                                  struct amdgpu_iv_entry *entry)
6688 {
6689         DRM_ERROR("Illegal register access in command stream\n");
6690         gfx_v8_0_fault(adev, entry);
6691         return 0;
6692 }
6693
6694 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
6695                                   struct amdgpu_irq_src *source,
6696                                   struct amdgpu_iv_entry *entry)
6697 {
6698         DRM_ERROR("Illegal instruction in command stream\n");
6699         gfx_v8_0_fault(adev, entry);
6700         return 0;
6701 }
6702
6703 static int gfx_v8_0_cp_ecc_error_irq(struct amdgpu_device *adev,
6704                                      struct amdgpu_irq_src *source,
6705                                      struct amdgpu_iv_entry *entry)
6706 {
6707         DRM_ERROR("CP EDC/ECC error detected.");
6708         return 0;
6709 }
6710
6711 static void gfx_v8_0_parse_sq_irq(struct amdgpu_device *adev, unsigned ih_data)
6712 {
6713         u32 enc, se_id, sh_id, cu_id;
6714         char type[20];
6715         int sq_edc_source = -1;
6716
6717         enc = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_CMN, ENCODING);
6718         se_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_CMN, SE_ID);
6719
6720         switch (enc) {
6721                 case 0:
6722                         DRM_INFO("SQ general purpose intr detected:"
6723                                         "se_id %d, immed_overflow %d, host_reg_overflow %d,"
6724                                         "host_cmd_overflow %d, cmd_timestamp %d,"
6725                                         "reg_timestamp %d, thread_trace_buff_full %d,"
6726                                         "wlt %d, thread_trace %d.\n",
6727                                         se_id,
6728                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, IMMED_OVERFLOW),
6729                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, HOST_REG_OVERFLOW),
6730                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, HOST_CMD_OVERFLOW),
6731                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, CMD_TIMESTAMP),
6732                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, REG_TIMESTAMP),
6733                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, THREAD_TRACE_BUF_FULL),
6734                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, WLT),
6735                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, THREAD_TRACE)
6736                                         );
6737                         break;
6738                 case 1:
6739                 case 2:
6740
6741                         cu_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, CU_ID);
6742                         sh_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, SH_ID);
6743
6744                         /*
6745                          * This function can be called either directly from ISR
6746                          * or from BH in which case we can access SQ_EDC_INFO
6747                          * instance
6748                          */
6749                         if (in_task()) {
6750                                 mutex_lock(&adev->grbm_idx_mutex);
6751                                 gfx_v8_0_select_se_sh(adev, se_id, sh_id, cu_id);
6752
6753                                 sq_edc_source = REG_GET_FIELD(RREG32(mmSQ_EDC_INFO), SQ_EDC_INFO, SOURCE);
6754
6755                                 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6756                                 mutex_unlock(&adev->grbm_idx_mutex);
6757                         }
6758
6759                         if (enc == 1)
6760                                 sprintf(type, "instruction intr");
6761                         else
6762                                 sprintf(type, "EDC/ECC error");
6763
6764                         DRM_INFO(
6765                                 "SQ %s detected: "
6766                                         "se_id %d, sh_id %d, cu_id %d, simd_id %d, wave_id %d, vm_id %d "
6767                                         "trap %s, sq_ed_info.source %s.\n",
6768                                         type, se_id, sh_id, cu_id,
6769                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, SIMD_ID),
6770                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, WAVE_ID),
6771                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, VM_ID),
6772                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, PRIV) ? "true" : "false",
6773                                         (sq_edc_source != -1) ? sq_edc_source_names[sq_edc_source] : "unavailable"
6774                                 );
6775                         break;
6776                 default:
6777                         DRM_ERROR("SQ invalid encoding type\n.");
6778         }
6779 }
6780
6781 static void gfx_v8_0_sq_irq_work_func(struct work_struct *work)
6782 {
6783
6784         struct amdgpu_device *adev = container_of(work, struct amdgpu_device, gfx.sq_work.work);
6785         struct sq_work *sq_work = container_of(work, struct sq_work, work);
6786
6787         gfx_v8_0_parse_sq_irq(adev, sq_work->ih_data);
6788 }
6789
6790 static int gfx_v8_0_sq_irq(struct amdgpu_device *adev,
6791                            struct amdgpu_irq_src *source,
6792                            struct amdgpu_iv_entry *entry)
6793 {
6794         unsigned ih_data = entry->src_data[0];
6795
6796         /*
6797          * Try to submit work so SQ_EDC_INFO can be accessed from
6798          * BH. If previous work submission hasn't finished yet
6799          * just print whatever info is possible directly from the ISR.
6800          */
6801         if (work_pending(&adev->gfx.sq_work.work)) {
6802                 gfx_v8_0_parse_sq_irq(adev, ih_data);
6803         } else {
6804                 adev->gfx.sq_work.ih_data = ih_data;
6805                 schedule_work(&adev->gfx.sq_work.work);
6806         }
6807
6808         return 0;
6809 }
6810
6811 static void gfx_v8_0_emit_mem_sync(struct amdgpu_ring *ring)
6812 {
6813         amdgpu_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
6814         amdgpu_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
6815                           PACKET3_TC_ACTION_ENA |
6816                           PACKET3_SH_KCACHE_ACTION_ENA |
6817                           PACKET3_SH_ICACHE_ACTION_ENA |
6818                           PACKET3_TC_WB_ACTION_ENA);  /* CP_COHER_CNTL */
6819         amdgpu_ring_write(ring, 0xffffffff);  /* CP_COHER_SIZE */
6820         amdgpu_ring_write(ring, 0);  /* CP_COHER_BASE */
6821         amdgpu_ring_write(ring, 0x0000000A); /* poll interval */
6822 }
6823
6824 static void gfx_v8_0_emit_mem_sync_compute(struct amdgpu_ring *ring)
6825 {
6826         amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 5));
6827         amdgpu_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
6828                           PACKET3_TC_ACTION_ENA |
6829                           PACKET3_SH_KCACHE_ACTION_ENA |
6830                           PACKET3_SH_ICACHE_ACTION_ENA |
6831                           PACKET3_TC_WB_ACTION_ENA);  /* CP_COHER_CNTL */
6832         amdgpu_ring_write(ring, 0xffffffff);    /* CP_COHER_SIZE */
6833         amdgpu_ring_write(ring, 0xff);          /* CP_COHER_SIZE_HI */
6834         amdgpu_ring_write(ring, 0);             /* CP_COHER_BASE */
6835         amdgpu_ring_write(ring, 0);             /* CP_COHER_BASE_HI */
6836         amdgpu_ring_write(ring, 0x0000000A);    /* poll interval */
6837 }
6838
6839 static const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
6840         .name = "gfx_v8_0",
6841         .early_init = gfx_v8_0_early_init,
6842         .late_init = gfx_v8_0_late_init,
6843         .sw_init = gfx_v8_0_sw_init,
6844         .sw_fini = gfx_v8_0_sw_fini,
6845         .hw_init = gfx_v8_0_hw_init,
6846         .hw_fini = gfx_v8_0_hw_fini,
6847         .suspend = gfx_v8_0_suspend,
6848         .resume = gfx_v8_0_resume,
6849         .is_idle = gfx_v8_0_is_idle,
6850         .wait_for_idle = gfx_v8_0_wait_for_idle,
6851         .check_soft_reset = gfx_v8_0_check_soft_reset,
6852         .pre_soft_reset = gfx_v8_0_pre_soft_reset,
6853         .soft_reset = gfx_v8_0_soft_reset,
6854         .post_soft_reset = gfx_v8_0_post_soft_reset,
6855         .set_clockgating_state = gfx_v8_0_set_clockgating_state,
6856         .set_powergating_state = gfx_v8_0_set_powergating_state,
6857         .get_clockgating_state = gfx_v8_0_get_clockgating_state,
6858 };
6859
6860 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
6861         .type = AMDGPU_RING_TYPE_GFX,
6862         .align_mask = 0xff,
6863         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6864         .support_64bit_ptrs = false,
6865         .get_rptr = gfx_v8_0_ring_get_rptr,
6866         .get_wptr = gfx_v8_0_ring_get_wptr_gfx,
6867         .set_wptr = gfx_v8_0_ring_set_wptr_gfx,
6868         .emit_frame_size = /* maximum 215dw if count 16 IBs in */
6869                 5 +  /* COND_EXEC */
6870                 7 +  /* PIPELINE_SYNC */
6871                 VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 9 + /* VM_FLUSH */
6872                 12 +  /* FENCE for VM_FLUSH */
6873                 20 + /* GDS switch */
6874                 4 + /* double SWITCH_BUFFER,
6875                        the first COND_EXEC jump to the place just
6876                            prior to this double SWITCH_BUFFER  */
6877                 5 + /* COND_EXEC */
6878                 7 +      /*     HDP_flush */
6879                 4 +      /*     VGT_flush */
6880                 14 + /* CE_META */
6881                 31 + /* DE_META */
6882                 3 + /* CNTX_CTRL */
6883                 5 + /* HDP_INVL */
6884                 12 + 12 + /* FENCE x2 */
6885                 2 + /* SWITCH_BUFFER */
6886                 5, /* SURFACE_SYNC */
6887         .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_gfx */
6888         .emit_ib = gfx_v8_0_ring_emit_ib_gfx,
6889         .emit_fence = gfx_v8_0_ring_emit_fence_gfx,
6890         .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6891         .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6892         .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6893         .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6894         .test_ring = gfx_v8_0_ring_test_ring,
6895         .test_ib = gfx_v8_0_ring_test_ib,
6896         .insert_nop = amdgpu_ring_insert_nop,
6897         .pad_ib = amdgpu_ring_generic_pad_ib,
6898         .emit_switch_buffer = gfx_v8_ring_emit_sb,
6899         .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
6900         .init_cond_exec = gfx_v8_0_ring_emit_init_cond_exec,
6901         .patch_cond_exec = gfx_v8_0_ring_emit_patch_cond_exec,
6902         .emit_wreg = gfx_v8_0_ring_emit_wreg,
6903         .soft_recovery = gfx_v8_0_ring_soft_recovery,
6904         .emit_mem_sync = gfx_v8_0_emit_mem_sync,
6905 };
6906
6907 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
6908         .type = AMDGPU_RING_TYPE_COMPUTE,
6909         .align_mask = 0xff,
6910         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6911         .support_64bit_ptrs = false,
6912         .get_rptr = gfx_v8_0_ring_get_rptr,
6913         .get_wptr = gfx_v8_0_ring_get_wptr_compute,
6914         .set_wptr = gfx_v8_0_ring_set_wptr_compute,
6915         .emit_frame_size =
6916                 20 + /* gfx_v8_0_ring_emit_gds_switch */
6917                 7 + /* gfx_v8_0_ring_emit_hdp_flush */
6918                 5 + /* hdp_invalidate */
6919                 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6920                 VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v8_0_ring_emit_vm_flush */
6921                 7 + 7 + 7 + /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
6922                 7, /* gfx_v8_0_emit_mem_sync_compute */
6923         .emit_ib_size = 7, /* gfx_v8_0_ring_emit_ib_compute */
6924         .emit_ib = gfx_v8_0_ring_emit_ib_compute,
6925         .emit_fence = gfx_v8_0_ring_emit_fence_compute,
6926         .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6927         .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6928         .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6929         .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6930         .test_ring = gfx_v8_0_ring_test_ring,
6931         .test_ib = gfx_v8_0_ring_test_ib,
6932         .insert_nop = amdgpu_ring_insert_nop,
6933         .pad_ib = amdgpu_ring_generic_pad_ib,
6934         .emit_wreg = gfx_v8_0_ring_emit_wreg,
6935         .emit_mem_sync = gfx_v8_0_emit_mem_sync_compute,
6936 };
6937
6938 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {
6939         .type = AMDGPU_RING_TYPE_KIQ,
6940         .align_mask = 0xff,
6941         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6942         .support_64bit_ptrs = false,
6943         .get_rptr = gfx_v8_0_ring_get_rptr,
6944         .get_wptr = gfx_v8_0_ring_get_wptr_compute,
6945         .set_wptr = gfx_v8_0_ring_set_wptr_compute,
6946         .emit_frame_size =
6947                 20 + /* gfx_v8_0_ring_emit_gds_switch */
6948                 7 + /* gfx_v8_0_ring_emit_hdp_flush */
6949                 5 + /* hdp_invalidate */
6950                 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6951                 17 + /* gfx_v8_0_ring_emit_vm_flush */
6952                 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */
6953         .emit_ib_size = 7, /* gfx_v8_0_ring_emit_ib_compute */
6954         .emit_fence = gfx_v8_0_ring_emit_fence_kiq,
6955         .test_ring = gfx_v8_0_ring_test_ring,
6956         .insert_nop = amdgpu_ring_insert_nop,
6957         .pad_ib = amdgpu_ring_generic_pad_ib,
6958         .emit_rreg = gfx_v8_0_ring_emit_rreg,
6959         .emit_wreg = gfx_v8_0_ring_emit_wreg,
6960 };
6961
6962 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
6963 {
6964         int i;
6965
6966         adev->gfx.kiq.ring.funcs = &gfx_v8_0_ring_funcs_kiq;
6967
6968         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
6969                 adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
6970
6971         for (i = 0; i < adev->gfx.num_compute_rings; i++)
6972                 adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
6973 }
6974
6975 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
6976         .set = gfx_v8_0_set_eop_interrupt_state,
6977         .process = gfx_v8_0_eop_irq,
6978 };
6979
6980 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
6981         .set = gfx_v8_0_set_priv_reg_fault_state,
6982         .process = gfx_v8_0_priv_reg_irq,
6983 };
6984
6985 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
6986         .set = gfx_v8_0_set_priv_inst_fault_state,
6987         .process = gfx_v8_0_priv_inst_irq,
6988 };
6989
6990 static const struct amdgpu_irq_src_funcs gfx_v8_0_cp_ecc_error_irq_funcs = {
6991         .set = gfx_v8_0_set_cp_ecc_int_state,
6992         .process = gfx_v8_0_cp_ecc_error_irq,
6993 };
6994
6995 static const struct amdgpu_irq_src_funcs gfx_v8_0_sq_irq_funcs = {
6996         .set = gfx_v8_0_set_sq_int_state,
6997         .process = gfx_v8_0_sq_irq,
6998 };
6999
7000 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
7001 {
7002         adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
7003         adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
7004
7005         adev->gfx.priv_reg_irq.num_types = 1;
7006         adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
7007
7008         adev->gfx.priv_inst_irq.num_types = 1;
7009         adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
7010
7011         adev->gfx.cp_ecc_error_irq.num_types = 1;
7012         adev->gfx.cp_ecc_error_irq.funcs = &gfx_v8_0_cp_ecc_error_irq_funcs;
7013
7014         adev->gfx.sq_irq.num_types = 1;
7015         adev->gfx.sq_irq.funcs = &gfx_v8_0_sq_irq_funcs;
7016 }
7017
7018 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
7019 {
7020         adev->gfx.rlc.funcs = &iceland_rlc_funcs;
7021 }
7022
7023 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
7024 {
7025         /* init asci gds info */
7026         adev->gds.gds_size = RREG32(mmGDS_VMID0_SIZE);
7027         adev->gds.gws_size = 64;
7028         adev->gds.oa_size = 16;
7029         adev->gds.gds_compute_max_wave_id = RREG32(mmGDS_COMPUTE_MAX_WAVE_ID);
7030 }
7031
7032 static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
7033                                                  u32 bitmap)
7034 {
7035         u32 data;
7036
7037         if (!bitmap)
7038                 return;
7039
7040         data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7041         data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7042
7043         WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
7044 }
7045
7046 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
7047 {
7048         u32 data, mask;
7049
7050         data =  RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) |
7051                 RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
7052
7053         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
7054
7055         return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask;
7056 }
7057
7058 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
7059 {
7060         int i, j, k, counter, active_cu_number = 0;
7061         u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
7062         struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
7063         unsigned disable_masks[4 * 2];
7064         u32 ao_cu_num;
7065
7066         memset(cu_info, 0, sizeof(*cu_info));
7067
7068         if (adev->flags & AMD_IS_APU)
7069                 ao_cu_num = 2;
7070         else
7071                 ao_cu_num = adev->gfx.config.max_cu_per_sh;
7072
7073         amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
7074
7075         mutex_lock(&adev->grbm_idx_mutex);
7076         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
7077                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
7078                         mask = 1;
7079                         ao_bitmap = 0;
7080                         counter = 0;
7081                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
7082                         if (i < 4 && j < 2)
7083                                 gfx_v8_0_set_user_cu_inactive_bitmap(
7084                                         adev, disable_masks[i * 2 + j]);
7085                         bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
7086                         cu_info->bitmap[i][j] = bitmap;
7087
7088                         for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
7089                                 if (bitmap & mask) {
7090                                         if (counter < ao_cu_num)
7091                                                 ao_bitmap |= mask;
7092                                         counter ++;
7093                                 }
7094                                 mask <<= 1;
7095                         }
7096                         active_cu_number += counter;
7097                         if (i < 2 && j < 2)
7098                                 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
7099                         cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
7100                 }
7101         }
7102         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
7103         mutex_unlock(&adev->grbm_idx_mutex);
7104
7105         cu_info->number = active_cu_number;
7106         cu_info->ao_cu_mask = ao_cu_mask;
7107         cu_info->simd_per_cu = NUM_SIMD_PER_CU;
7108         cu_info->max_waves_per_simd = 10;
7109         cu_info->max_scratch_slots_per_cu = 32;
7110         cu_info->wave_front_size = 64;
7111         cu_info->lds_size = 64;
7112 }
7113
7114 const struct amdgpu_ip_block_version gfx_v8_0_ip_block =
7115 {
7116         .type = AMD_IP_BLOCK_TYPE_GFX,
7117         .major = 8,
7118         .minor = 0,
7119         .rev = 0,
7120         .funcs = &gfx_v8_0_ip_funcs,
7121 };
7122
7123 const struct amdgpu_ip_block_version gfx_v8_1_ip_block =
7124 {
7125         .type = AMD_IP_BLOCK_TYPE_GFX,
7126         .major = 8,
7127         .minor = 1,
7128         .rev = 0,
7129         .funcs = &gfx_v8_0_ip_funcs,
7130 };
7131
7132 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
7133 {
7134         uint64_t ce_payload_addr;
7135         int cnt_ce;
7136         union {
7137                 struct vi_ce_ib_state regular;
7138                 struct vi_ce_ib_state_chained_ib chained;
7139         } ce_payload = {};
7140
7141         if (ring->adev->virt.chained_ib_support) {
7142                 ce_payload_addr = amdgpu_csa_vaddr(ring->adev) +
7143                         offsetof(struct vi_gfx_meta_data_chained_ib, ce_payload);
7144                 cnt_ce = (sizeof(ce_payload.chained) >> 2) + 4 - 2;
7145         } else {
7146                 ce_payload_addr = amdgpu_csa_vaddr(ring->adev) +
7147                         offsetof(struct vi_gfx_meta_data, ce_payload);
7148                 cnt_ce = (sizeof(ce_payload.regular) >> 2) + 4 - 2;
7149         }
7150
7151         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_ce));
7152         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
7153                                 WRITE_DATA_DST_SEL(8) |
7154                                 WR_CONFIRM) |
7155                                 WRITE_DATA_CACHE_POLICY(0));
7156         amdgpu_ring_write(ring, lower_32_bits(ce_payload_addr));
7157         amdgpu_ring_write(ring, upper_32_bits(ce_payload_addr));
7158         amdgpu_ring_write_multiple(ring, (void *)&ce_payload, cnt_ce - 2);
7159 }
7160
7161 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring)
7162 {
7163         uint64_t de_payload_addr, gds_addr, csa_addr;
7164         int cnt_de;
7165         union {
7166                 struct vi_de_ib_state regular;
7167                 struct vi_de_ib_state_chained_ib chained;
7168         } de_payload = {};
7169
7170         csa_addr = amdgpu_csa_vaddr(ring->adev);
7171         gds_addr = csa_addr + 4096;
7172         if (ring->adev->virt.chained_ib_support) {
7173                 de_payload.chained.gds_backup_addrlo = lower_32_bits(gds_addr);
7174                 de_payload.chained.gds_backup_addrhi = upper_32_bits(gds_addr);
7175                 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data_chained_ib, de_payload);
7176                 cnt_de = (sizeof(de_payload.chained) >> 2) + 4 - 2;
7177         } else {
7178                 de_payload.regular.gds_backup_addrlo = lower_32_bits(gds_addr);
7179                 de_payload.regular.gds_backup_addrhi = upper_32_bits(gds_addr);
7180                 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data, de_payload);
7181                 cnt_de = (sizeof(de_payload.regular) >> 2) + 4 - 2;
7182         }
7183
7184         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_de));
7185         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
7186                                 WRITE_DATA_DST_SEL(8) |
7187                                 WR_CONFIRM) |
7188                                 WRITE_DATA_CACHE_POLICY(0));
7189         amdgpu_ring_write(ring, lower_32_bits(de_payload_addr));
7190         amdgpu_ring_write(ring, upper_32_bits(de_payload_addr));
7191         amdgpu_ring_write_multiple(ring, (void *)&de_payload, cnt_de - 2);
7192 }
This page took 0.473609 seconds and 4 git commands to generate.