]> Git Repo - linux.git/blob - drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
Merge tag 'scsi-misc' of git://git.kernel.org/pub/scm/linux/kernel/git/jejb/scsi
[linux.git] / drivers / gpu / drm / amd / amdgpu / gfx_v8_0.c
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23
24 #include <linux/delay.h>
25 #include <linux/kernel.h>
26 #include <linux/firmware.h>
27 #include <linux/module.h>
28 #include <linux/pci.h>
29
30 #include "amdgpu.h"
31 #include "amdgpu_gfx.h"
32 #include "vi.h"
33 #include "vi_structs.h"
34 #include "vid.h"
35 #include "amdgpu_ucode.h"
36 #include "amdgpu_atombios.h"
37 #include "atombios_i2c.h"
38 #include "clearstate_vi.h"
39
40 #include "gmc/gmc_8_2_d.h"
41 #include "gmc/gmc_8_2_sh_mask.h"
42
43 #include "oss/oss_3_0_d.h"
44 #include "oss/oss_3_0_sh_mask.h"
45
46 #include "bif/bif_5_0_d.h"
47 #include "bif/bif_5_0_sh_mask.h"
48 #include "gca/gfx_8_0_d.h"
49 #include "gca/gfx_8_0_enum.h"
50 #include "gca/gfx_8_0_sh_mask.h"
51
52 #include "dce/dce_10_0_d.h"
53 #include "dce/dce_10_0_sh_mask.h"
54
55 #include "smu/smu_7_1_3_d.h"
56
57 #include "ivsrcid/ivsrcid_vislands30.h"
58
59 #define GFX8_NUM_GFX_RINGS     1
60 #define GFX8_MEC_HPD_SIZE 4096
61
62 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
63 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
64 #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
65 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
66
67 #define ARRAY_MODE(x)                                   ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
68 #define PIPE_CONFIG(x)                                  ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
69 #define TILE_SPLIT(x)                                   ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
70 #define MICRO_TILE_MODE_NEW(x)                          ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
71 #define SAMPLE_SPLIT(x)                                 ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
72 #define BANK_WIDTH(x)                                   ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
73 #define BANK_HEIGHT(x)                                  ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
74 #define MACRO_TILE_ASPECT(x)                            ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
75 #define NUM_BANKS(x)                                    ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
76
77 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK            0x00000001L
78 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK            0x00000002L
79 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK           0x00000004L
80 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK           0x00000008L
81 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK           0x00000010L
82 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK           0x00000020L
83
84 /* BPM SERDES CMD */
85 #define SET_BPM_SERDES_CMD    1
86 #define CLE_BPM_SERDES_CMD    0
87
88 /* BPM Register Address*/
89 enum {
90         BPM_REG_CGLS_EN = 0,        /* Enable/Disable CGLS */
91         BPM_REG_CGLS_ON,            /* ON/OFF CGLS: shall be controlled by RLC FW */
92         BPM_REG_CGCG_OVERRIDE,      /* Set/Clear CGCG Override */
93         BPM_REG_MGCG_OVERRIDE,      /* Set/Clear MGCG Override */
94         BPM_REG_FGCG_OVERRIDE,      /* Set/Clear FGCG Override */
95         BPM_REG_FGCG_MAX
96 };
97
98 #define RLC_FormatDirectRegListLength        14
99
100 MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
101 MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
102 MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
103 MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
104 MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
105 MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
106
107 MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
108 MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
109 MODULE_FIRMWARE("amdgpu/stoney_me.bin");
110 MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
111 MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
112
113 MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
114 MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
115 MODULE_FIRMWARE("amdgpu/tonga_me.bin");
116 MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
117 MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
118 MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
119
120 MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
121 MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
122 MODULE_FIRMWARE("amdgpu/topaz_me.bin");
123 MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
124 MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
125
126 MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
127 MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
128 MODULE_FIRMWARE("amdgpu/fiji_me.bin");
129 MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
130 MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
131 MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
132
133 MODULE_FIRMWARE("amdgpu/polaris10_ce.bin");
134 MODULE_FIRMWARE("amdgpu/polaris10_ce_2.bin");
135 MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin");
136 MODULE_FIRMWARE("amdgpu/polaris10_pfp_2.bin");
137 MODULE_FIRMWARE("amdgpu/polaris10_me.bin");
138 MODULE_FIRMWARE("amdgpu/polaris10_me_2.bin");
139 MODULE_FIRMWARE("amdgpu/polaris10_mec.bin");
140 MODULE_FIRMWARE("amdgpu/polaris10_mec_2.bin");
141 MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin");
142 MODULE_FIRMWARE("amdgpu/polaris10_mec2_2.bin");
143 MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin");
144
145 MODULE_FIRMWARE("amdgpu/polaris11_ce.bin");
146 MODULE_FIRMWARE("amdgpu/polaris11_ce_2.bin");
147 MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin");
148 MODULE_FIRMWARE("amdgpu/polaris11_pfp_2.bin");
149 MODULE_FIRMWARE("amdgpu/polaris11_me.bin");
150 MODULE_FIRMWARE("amdgpu/polaris11_me_2.bin");
151 MODULE_FIRMWARE("amdgpu/polaris11_mec.bin");
152 MODULE_FIRMWARE("amdgpu/polaris11_mec_2.bin");
153 MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin");
154 MODULE_FIRMWARE("amdgpu/polaris11_mec2_2.bin");
155 MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin");
156
157 MODULE_FIRMWARE("amdgpu/polaris12_ce.bin");
158 MODULE_FIRMWARE("amdgpu/polaris12_ce_2.bin");
159 MODULE_FIRMWARE("amdgpu/polaris12_pfp.bin");
160 MODULE_FIRMWARE("amdgpu/polaris12_pfp_2.bin");
161 MODULE_FIRMWARE("amdgpu/polaris12_me.bin");
162 MODULE_FIRMWARE("amdgpu/polaris12_me_2.bin");
163 MODULE_FIRMWARE("amdgpu/polaris12_mec.bin");
164 MODULE_FIRMWARE("amdgpu/polaris12_mec_2.bin");
165 MODULE_FIRMWARE("amdgpu/polaris12_mec2.bin");
166 MODULE_FIRMWARE("amdgpu/polaris12_mec2_2.bin");
167 MODULE_FIRMWARE("amdgpu/polaris12_rlc.bin");
168
169 MODULE_FIRMWARE("amdgpu/vegam_ce.bin");
170 MODULE_FIRMWARE("amdgpu/vegam_pfp.bin");
171 MODULE_FIRMWARE("amdgpu/vegam_me.bin");
172 MODULE_FIRMWARE("amdgpu/vegam_mec.bin");
173 MODULE_FIRMWARE("amdgpu/vegam_mec2.bin");
174 MODULE_FIRMWARE("amdgpu/vegam_rlc.bin");
175
176 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
177 {
178         {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
179         {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
180         {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
181         {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
182         {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
183         {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
184         {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
185         {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
186         {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
187         {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
188         {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
189         {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
190         {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
191         {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
192         {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
193         {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
194 };
195
196 static const u32 golden_settings_tonga_a11[] =
197 {
198         mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
199         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
200         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
201         mmGB_GPU_ID, 0x0000000f, 0x00000000,
202         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
203         mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
204         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
205         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
206         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
207         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
208         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
209         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
210         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
211         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
212         mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
213         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
214 };
215
216 static const u32 tonga_golden_common_all[] =
217 {
218         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
219         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
220         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
221         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
222         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
223         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
224         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
225         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
226 };
227
228 static const u32 tonga_mgcg_cgcg_init[] =
229 {
230         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
231         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
232         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
233         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
234         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
235         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
236         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
237         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
238         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
239         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
240         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
241         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
242         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
243         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
244         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
245         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
246         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
247         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
248         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
249         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
250         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
251         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
252         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
253         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
254         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
255         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
256         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
257         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
258         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
259         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
260         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
261         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
262         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
263         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
264         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
265         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
266         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
267         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
268         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
269         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
270         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
271         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
272         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
273         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
274         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
275         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
276         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
277         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
278         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
279         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
280         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
281         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
282         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
283         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
284         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
285         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
286         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
287         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
288         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
289         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
290         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
291         mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
292         mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
293         mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
294         mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
295         mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
296         mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
297         mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
298         mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
299         mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
300         mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
301         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
302         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
303         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
304         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
305 };
306
307 static const u32 golden_settings_vegam_a11[] =
308 {
309         mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
310         mmCB_HW_CONTROL_2, 0x0f000000, 0x0d000000,
311         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
312         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
313         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
314         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
315         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x3a00161a,
316         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002e,
317         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
318         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
319         mmSQ_CONFIG, 0x07f80000, 0x01180000,
320         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
321         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
322         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
323         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
324         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x32761054,
325         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
326 };
327
328 static const u32 vegam_golden_common_all[] =
329 {
330         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
331         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
332         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
333         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
334         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
335         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
336 };
337
338 static const u32 golden_settings_polaris11_a11[] =
339 {
340         mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208,
341         mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
342         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
343         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
344         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
345         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
346         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
347         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
348         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
349         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
350         mmSQ_CONFIG, 0x07f80000, 0x01180000,
351         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
352         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
353         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
354         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
355         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
356         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
357 };
358
359 static const u32 polaris11_golden_common_all[] =
360 {
361         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
362         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002,
363         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
364         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
365         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
366         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
367 };
368
369 static const u32 golden_settings_polaris10_a11[] =
370 {
371         mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
372         mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
373         mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
374         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
375         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
376         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
377         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
378         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
379         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a,
380         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
381         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
382         mmSQ_CONFIG, 0x07f80000, 0x07180000,
383         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
384         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
385         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
386         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
387         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
388 };
389
390 static const u32 polaris10_golden_common_all[] =
391 {
392         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
393         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
394         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
395         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
396         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
397         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
398         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
399         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
400 };
401
402 static const u32 fiji_golden_common_all[] =
403 {
404         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
405         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
406         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
407         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
408         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
409         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
410         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
411         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
412         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
413         mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
414 };
415
416 static const u32 golden_settings_fiji_a10[] =
417 {
418         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
419         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
420         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
421         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
422         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
423         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
424         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
425         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
426         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
427         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
428         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
429 };
430
431 static const u32 fiji_mgcg_cgcg_init[] =
432 {
433         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
434         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
435         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
436         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
437         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
438         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
439         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
440         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
441         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
442         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
443         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
444         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
445         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
446         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
447         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
448         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
449         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
450         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
451         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
452         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
453         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
454         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
455         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
456         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
457         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
458         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
459         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
460         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
461         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
462         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
463         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
464         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
465         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
466         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
467         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
468 };
469
470 static const u32 golden_settings_iceland_a11[] =
471 {
472         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
473         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
474         mmDB_DEBUG3, 0xc0000000, 0xc0000000,
475         mmGB_GPU_ID, 0x0000000f, 0x00000000,
476         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
477         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
478         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
479         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
480         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
481         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
482         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
483         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
484         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
485         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
486         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
487         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
488 };
489
490 static const u32 iceland_golden_common_all[] =
491 {
492         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
493         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
494         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
495         mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
496         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
497         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
498         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
499         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
500 };
501
502 static const u32 iceland_mgcg_cgcg_init[] =
503 {
504         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
505         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
506         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
507         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
508         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
509         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
510         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
511         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
512         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
513         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
514         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
515         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
516         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
517         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
518         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
519         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
520         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
521         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
522         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
523         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
524         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
525         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
526         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
527         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
528         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
529         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
530         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
531         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
532         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
533         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
534         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
535         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
536         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
537         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
538         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
539         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
540         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
541         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
542         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
543         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
544         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
545         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
546         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
547         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
548         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
549         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
550         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
551         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
552         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
553         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
554         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
555         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
556         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
557         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
558         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
559         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
560         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
561         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
562         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
563         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
564         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
565         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
566         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
567         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
568 };
569
570 static const u32 cz_golden_settings_a11[] =
571 {
572         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
573         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
574         mmGB_GPU_ID, 0x0000000f, 0x00000000,
575         mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
576         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
577         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
578         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
579         mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
580         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
581         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
582         mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
583         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
584 };
585
586 static const u32 cz_golden_common_all[] =
587 {
588         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
589         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
590         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
591         mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
592         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
593         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
594         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
595         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
596 };
597
598 static const u32 cz_mgcg_cgcg_init[] =
599 {
600         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
601         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
602         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
603         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
604         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
605         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
606         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
607         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
608         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
609         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
610         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
611         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
612         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
613         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
614         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
615         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
616         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
617         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
618         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
619         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
620         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
621         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
622         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
623         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
624         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
625         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
626         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
627         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
628         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
629         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
630         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
631         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
632         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
633         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
634         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
635         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
636         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
637         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
638         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
639         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
640         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
641         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
642         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
643         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
644         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
645         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
646         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
647         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
648         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
649         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
650         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
651         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
652         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
653         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
654         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
655         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
656         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
657         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
658         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
659         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
660         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
661         mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
662         mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
663         mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
664         mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
665         mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
666         mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
667         mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
668         mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
669         mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
670         mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
671         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
672         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
673         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
674         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
675 };
676
677 static const u32 stoney_golden_settings_a11[] =
678 {
679         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
680         mmGB_GPU_ID, 0x0000000f, 0x00000000,
681         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
682         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
683         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
684         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
685         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
686         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
687         mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
688         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
689 };
690
691 static const u32 stoney_golden_common_all[] =
692 {
693         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
694         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
695         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
696         mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
697         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
698         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
699         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
700         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
701 };
702
703 static const u32 stoney_mgcg_cgcg_init[] =
704 {
705         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
706         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
707         mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
708         mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
709         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
710 };
711
712
713 static const char * const sq_edc_source_names[] = {
714         "SQ_EDC_INFO_SOURCE_INVALID: No EDC error has occurred",
715         "SQ_EDC_INFO_SOURCE_INST: EDC source is Instruction Fetch",
716         "SQ_EDC_INFO_SOURCE_SGPR: EDC source is SGPR or SQC data return",
717         "SQ_EDC_INFO_SOURCE_VGPR: EDC source is VGPR",
718         "SQ_EDC_INFO_SOURCE_LDS: EDC source is LDS",
719         "SQ_EDC_INFO_SOURCE_GDS: EDC source is GDS",
720         "SQ_EDC_INFO_SOURCE_TA: EDC source is TA",
721 };
722
723 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
724 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
725 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
726 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
727 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
728 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
729 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring);
730 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring);
731
732 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
733 {
734         switch (adev->asic_type) {
735         case CHIP_TOPAZ:
736                 amdgpu_device_program_register_sequence(adev,
737                                                         iceland_mgcg_cgcg_init,
738                                                         ARRAY_SIZE(iceland_mgcg_cgcg_init));
739                 amdgpu_device_program_register_sequence(adev,
740                                                         golden_settings_iceland_a11,
741                                                         ARRAY_SIZE(golden_settings_iceland_a11));
742                 amdgpu_device_program_register_sequence(adev,
743                                                         iceland_golden_common_all,
744                                                         ARRAY_SIZE(iceland_golden_common_all));
745                 break;
746         case CHIP_FIJI:
747                 amdgpu_device_program_register_sequence(adev,
748                                                         fiji_mgcg_cgcg_init,
749                                                         ARRAY_SIZE(fiji_mgcg_cgcg_init));
750                 amdgpu_device_program_register_sequence(adev,
751                                                         golden_settings_fiji_a10,
752                                                         ARRAY_SIZE(golden_settings_fiji_a10));
753                 amdgpu_device_program_register_sequence(adev,
754                                                         fiji_golden_common_all,
755                                                         ARRAY_SIZE(fiji_golden_common_all));
756                 break;
757
758         case CHIP_TONGA:
759                 amdgpu_device_program_register_sequence(adev,
760                                                         tonga_mgcg_cgcg_init,
761                                                         ARRAY_SIZE(tonga_mgcg_cgcg_init));
762                 amdgpu_device_program_register_sequence(adev,
763                                                         golden_settings_tonga_a11,
764                                                         ARRAY_SIZE(golden_settings_tonga_a11));
765                 amdgpu_device_program_register_sequence(adev,
766                                                         tonga_golden_common_all,
767                                                         ARRAY_SIZE(tonga_golden_common_all));
768                 break;
769         case CHIP_VEGAM:
770                 amdgpu_device_program_register_sequence(adev,
771                                                         golden_settings_vegam_a11,
772                                                         ARRAY_SIZE(golden_settings_vegam_a11));
773                 amdgpu_device_program_register_sequence(adev,
774                                                         vegam_golden_common_all,
775                                                         ARRAY_SIZE(vegam_golden_common_all));
776                 break;
777         case CHIP_POLARIS11:
778         case CHIP_POLARIS12:
779                 amdgpu_device_program_register_sequence(adev,
780                                                         golden_settings_polaris11_a11,
781                                                         ARRAY_SIZE(golden_settings_polaris11_a11));
782                 amdgpu_device_program_register_sequence(adev,
783                                                         polaris11_golden_common_all,
784                                                         ARRAY_SIZE(polaris11_golden_common_all));
785                 break;
786         case CHIP_POLARIS10:
787                 amdgpu_device_program_register_sequence(adev,
788                                                         golden_settings_polaris10_a11,
789                                                         ARRAY_SIZE(golden_settings_polaris10_a11));
790                 amdgpu_device_program_register_sequence(adev,
791                                                         polaris10_golden_common_all,
792                                                         ARRAY_SIZE(polaris10_golden_common_all));
793                 WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C);
794                 if (adev->pdev->revision == 0xc7 &&
795                     ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) ||
796                      (adev->pdev->subsystem_device == 0x4a8 && adev->pdev->subsystem_vendor == 0x1043) ||
797                      (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1682))) {
798                         amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD);
799                         amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0);
800                 }
801                 break;
802         case CHIP_CARRIZO:
803                 amdgpu_device_program_register_sequence(adev,
804                                                         cz_mgcg_cgcg_init,
805                                                         ARRAY_SIZE(cz_mgcg_cgcg_init));
806                 amdgpu_device_program_register_sequence(adev,
807                                                         cz_golden_settings_a11,
808                                                         ARRAY_SIZE(cz_golden_settings_a11));
809                 amdgpu_device_program_register_sequence(adev,
810                                                         cz_golden_common_all,
811                                                         ARRAY_SIZE(cz_golden_common_all));
812                 break;
813         case CHIP_STONEY:
814                 amdgpu_device_program_register_sequence(adev,
815                                                         stoney_mgcg_cgcg_init,
816                                                         ARRAY_SIZE(stoney_mgcg_cgcg_init));
817                 amdgpu_device_program_register_sequence(adev,
818                                                         stoney_golden_settings_a11,
819                                                         ARRAY_SIZE(stoney_golden_settings_a11));
820                 amdgpu_device_program_register_sequence(adev,
821                                                         stoney_golden_common_all,
822                                                         ARRAY_SIZE(stoney_golden_common_all));
823                 break;
824         default:
825                 break;
826         }
827 }
828
829 static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
830 {
831         adev->gfx.scratch.num_reg = 8;
832         adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
833         adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
834 }
835
836 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
837 {
838         struct amdgpu_device *adev = ring->adev;
839         uint32_t scratch;
840         uint32_t tmp = 0;
841         unsigned i;
842         int r;
843
844         r = amdgpu_gfx_scratch_get(adev, &scratch);
845         if (r)
846                 return r;
847
848         WREG32(scratch, 0xCAFEDEAD);
849         r = amdgpu_ring_alloc(ring, 3);
850         if (r)
851                 goto error_free_scratch;
852
853         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
854         amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
855         amdgpu_ring_write(ring, 0xDEADBEEF);
856         amdgpu_ring_commit(ring);
857
858         for (i = 0; i < adev->usec_timeout; i++) {
859                 tmp = RREG32(scratch);
860                 if (tmp == 0xDEADBEEF)
861                         break;
862                 udelay(1);
863         }
864
865         if (i >= adev->usec_timeout)
866                 r = -ETIMEDOUT;
867
868 error_free_scratch:
869         amdgpu_gfx_scratch_free(adev, scratch);
870         return r;
871 }
872
873 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
874 {
875         struct amdgpu_device *adev = ring->adev;
876         struct amdgpu_ib ib;
877         struct dma_fence *f = NULL;
878
879         unsigned int index;
880         uint64_t gpu_addr;
881         uint32_t tmp;
882         long r;
883
884         r = amdgpu_device_wb_get(adev, &index);
885         if (r)
886                 return r;
887
888         gpu_addr = adev->wb.gpu_addr + (index * 4);
889         adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
890         memset(&ib, 0, sizeof(ib));
891         r = amdgpu_ib_get(adev, NULL, 16,
892                                         AMDGPU_IB_POOL_DIRECT, &ib);
893         if (r)
894                 goto err1;
895
896         ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
897         ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
898         ib.ptr[2] = lower_32_bits(gpu_addr);
899         ib.ptr[3] = upper_32_bits(gpu_addr);
900         ib.ptr[4] = 0xDEADBEEF;
901         ib.length_dw = 5;
902
903         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
904         if (r)
905                 goto err2;
906
907         r = dma_fence_wait_timeout(f, false, timeout);
908         if (r == 0) {
909                 r = -ETIMEDOUT;
910                 goto err2;
911         } else if (r < 0) {
912                 goto err2;
913         }
914
915         tmp = adev->wb.wb[index];
916         if (tmp == 0xDEADBEEF)
917                 r = 0;
918         else
919                 r = -EINVAL;
920
921 err2:
922         amdgpu_ib_free(adev, &ib, NULL);
923         dma_fence_put(f);
924 err1:
925         amdgpu_device_wb_free(adev, index);
926         return r;
927 }
928
929
930 static void gfx_v8_0_free_microcode(struct amdgpu_device *adev)
931 {
932         release_firmware(adev->gfx.pfp_fw);
933         adev->gfx.pfp_fw = NULL;
934         release_firmware(adev->gfx.me_fw);
935         adev->gfx.me_fw = NULL;
936         release_firmware(adev->gfx.ce_fw);
937         adev->gfx.ce_fw = NULL;
938         release_firmware(adev->gfx.rlc_fw);
939         adev->gfx.rlc_fw = NULL;
940         release_firmware(adev->gfx.mec_fw);
941         adev->gfx.mec_fw = NULL;
942         if ((adev->asic_type != CHIP_STONEY) &&
943             (adev->asic_type != CHIP_TOPAZ))
944                 release_firmware(adev->gfx.mec2_fw);
945         adev->gfx.mec2_fw = NULL;
946
947         kfree(adev->gfx.rlc.register_list_format);
948 }
949
950 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
951 {
952         const char *chip_name;
953         char fw_name[30];
954         int err;
955         struct amdgpu_firmware_info *info = NULL;
956         const struct common_firmware_header *header = NULL;
957         const struct gfx_firmware_header_v1_0 *cp_hdr;
958         const struct rlc_firmware_header_v2_0 *rlc_hdr;
959         unsigned int *tmp = NULL, i;
960
961         DRM_DEBUG("\n");
962
963         switch (adev->asic_type) {
964         case CHIP_TOPAZ:
965                 chip_name = "topaz";
966                 break;
967         case CHIP_TONGA:
968                 chip_name = "tonga";
969                 break;
970         case CHIP_CARRIZO:
971                 chip_name = "carrizo";
972                 break;
973         case CHIP_FIJI:
974                 chip_name = "fiji";
975                 break;
976         case CHIP_STONEY:
977                 chip_name = "stoney";
978                 break;
979         case CHIP_POLARIS10:
980                 chip_name = "polaris10";
981                 break;
982         case CHIP_POLARIS11:
983                 chip_name = "polaris11";
984                 break;
985         case CHIP_POLARIS12:
986                 chip_name = "polaris12";
987                 break;
988         case CHIP_VEGAM:
989                 chip_name = "vegam";
990                 break;
991         default:
992                 BUG();
993         }
994
995         if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
996                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp_2.bin", chip_name);
997                 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
998                 if (err == -ENOENT) {
999                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
1000                         err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
1001                 }
1002         } else {
1003                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
1004                 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
1005         }
1006         if (err)
1007                 goto out;
1008         err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
1009         if (err)
1010                 goto out;
1011         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1012         adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1013         adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1014
1015         if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1016                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me_2.bin", chip_name);
1017                 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1018                 if (err == -ENOENT) {
1019                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1020                         err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1021                 }
1022         } else {
1023                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1024                 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1025         }
1026         if (err)
1027                 goto out;
1028         err = amdgpu_ucode_validate(adev->gfx.me_fw);
1029         if (err)
1030                 goto out;
1031         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1032         adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1033
1034         adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1035
1036         if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1037                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce_2.bin", chip_name);
1038                 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1039                 if (err == -ENOENT) {
1040                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1041                         err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1042                 }
1043         } else {
1044                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1045                 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1046         }
1047         if (err)
1048                 goto out;
1049         err = amdgpu_ucode_validate(adev->gfx.ce_fw);
1050         if (err)
1051                 goto out;
1052         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1053         adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1054         adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1055
1056         /*
1057          * Support for MCBP/Virtualization in combination with chained IBs is
1058          * formal released on feature version #46
1059          */
1060         if (adev->gfx.ce_feature_version >= 46 &&
1061             adev->gfx.pfp_feature_version >= 46) {
1062                 adev->virt.chained_ib_support = true;
1063                 DRM_INFO("Chained IB support enabled!\n");
1064         } else
1065                 adev->virt.chained_ib_support = false;
1066
1067         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
1068         err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
1069         if (err)
1070                 goto out;
1071         err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
1072         rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1073         adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
1074         adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
1075
1076         adev->gfx.rlc.save_and_restore_offset =
1077                         le32_to_cpu(rlc_hdr->save_and_restore_offset);
1078         adev->gfx.rlc.clear_state_descriptor_offset =
1079                         le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
1080         adev->gfx.rlc.avail_scratch_ram_locations =
1081                         le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
1082         adev->gfx.rlc.reg_restore_list_size =
1083                         le32_to_cpu(rlc_hdr->reg_restore_list_size);
1084         adev->gfx.rlc.reg_list_format_start =
1085                         le32_to_cpu(rlc_hdr->reg_list_format_start);
1086         adev->gfx.rlc.reg_list_format_separate_start =
1087                         le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
1088         adev->gfx.rlc.starting_offsets_start =
1089                         le32_to_cpu(rlc_hdr->starting_offsets_start);
1090         adev->gfx.rlc.reg_list_format_size_bytes =
1091                         le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
1092         adev->gfx.rlc.reg_list_size_bytes =
1093                         le32_to_cpu(rlc_hdr->reg_list_size_bytes);
1094
1095         adev->gfx.rlc.register_list_format =
1096                         kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
1097                                         adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
1098
1099         if (!adev->gfx.rlc.register_list_format) {
1100                 err = -ENOMEM;
1101                 goto out;
1102         }
1103
1104         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1105                         le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1106         for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++)
1107                 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
1108
1109         adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1110
1111         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1112                         le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1113         for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++)
1114                 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
1115
1116         if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1117                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec_2.bin", chip_name);
1118                 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1119                 if (err == -ENOENT) {
1120                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1121                         err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1122                 }
1123         } else {
1124                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1125                 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1126         }
1127         if (err)
1128                 goto out;
1129         err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1130         if (err)
1131                 goto out;
1132         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1133         adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1134         adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1135
1136         if ((adev->asic_type != CHIP_STONEY) &&
1137             (adev->asic_type != CHIP_TOPAZ)) {
1138                 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1139                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2_2.bin", chip_name);
1140                         err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1141                         if (err == -ENOENT) {
1142                                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1143                                 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1144                         }
1145                 } else {
1146                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1147                         err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1148                 }
1149                 if (!err) {
1150                         err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1151                         if (err)
1152                                 goto out;
1153                         cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1154                                 adev->gfx.mec2_fw->data;
1155                         adev->gfx.mec2_fw_version =
1156                                 le32_to_cpu(cp_hdr->header.ucode_version);
1157                         adev->gfx.mec2_feature_version =
1158                                 le32_to_cpu(cp_hdr->ucode_feature_version);
1159                 } else {
1160                         err = 0;
1161                         adev->gfx.mec2_fw = NULL;
1162                 }
1163         }
1164
1165         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1166         info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1167         info->fw = adev->gfx.pfp_fw;
1168         header = (const struct common_firmware_header *)info->fw->data;
1169         adev->firmware.fw_size +=
1170                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1171
1172         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1173         info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1174         info->fw = adev->gfx.me_fw;
1175         header = (const struct common_firmware_header *)info->fw->data;
1176         adev->firmware.fw_size +=
1177                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1178
1179         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1180         info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1181         info->fw = adev->gfx.ce_fw;
1182         header = (const struct common_firmware_header *)info->fw->data;
1183         adev->firmware.fw_size +=
1184                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1185
1186         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1187         info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1188         info->fw = adev->gfx.rlc_fw;
1189         header = (const struct common_firmware_header *)info->fw->data;
1190         adev->firmware.fw_size +=
1191                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1192
1193         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1194         info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1195         info->fw = adev->gfx.mec_fw;
1196         header = (const struct common_firmware_header *)info->fw->data;
1197         adev->firmware.fw_size +=
1198                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1199
1200         /* we need account JT in */
1201         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1202         adev->firmware.fw_size +=
1203                 ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE);
1204
1205         if (amdgpu_sriov_vf(adev)) {
1206                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_STORAGE];
1207                 info->ucode_id = AMDGPU_UCODE_ID_STORAGE;
1208                 info->fw = adev->gfx.mec_fw;
1209                 adev->firmware.fw_size +=
1210                         ALIGN(le32_to_cpu(64 * PAGE_SIZE), PAGE_SIZE);
1211         }
1212
1213         if (adev->gfx.mec2_fw) {
1214                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1215                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1216                 info->fw = adev->gfx.mec2_fw;
1217                 header = (const struct common_firmware_header *)info->fw->data;
1218                 adev->firmware.fw_size +=
1219                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1220         }
1221
1222 out:
1223         if (err) {
1224                 dev_err(adev->dev,
1225                         "gfx8: Failed to load firmware \"%s\"\n",
1226                         fw_name);
1227                 release_firmware(adev->gfx.pfp_fw);
1228                 adev->gfx.pfp_fw = NULL;
1229                 release_firmware(adev->gfx.me_fw);
1230                 adev->gfx.me_fw = NULL;
1231                 release_firmware(adev->gfx.ce_fw);
1232                 adev->gfx.ce_fw = NULL;
1233                 release_firmware(adev->gfx.rlc_fw);
1234                 adev->gfx.rlc_fw = NULL;
1235                 release_firmware(adev->gfx.mec_fw);
1236                 adev->gfx.mec_fw = NULL;
1237                 release_firmware(adev->gfx.mec2_fw);
1238                 adev->gfx.mec2_fw = NULL;
1239         }
1240         return err;
1241 }
1242
1243 static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
1244                                     volatile u32 *buffer)
1245 {
1246         u32 count = 0, i;
1247         const struct cs_section_def *sect = NULL;
1248         const struct cs_extent_def *ext = NULL;
1249
1250         if (adev->gfx.rlc.cs_data == NULL)
1251                 return;
1252         if (buffer == NULL)
1253                 return;
1254
1255         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1256         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1257
1258         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1259         buffer[count++] = cpu_to_le32(0x80000000);
1260         buffer[count++] = cpu_to_le32(0x80000000);
1261
1262         for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1263                 for (ext = sect->section; ext->extent != NULL; ++ext) {
1264                         if (sect->id == SECT_CONTEXT) {
1265                                 buffer[count++] =
1266                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1267                                 buffer[count++] = cpu_to_le32(ext->reg_index -
1268                                                 PACKET3_SET_CONTEXT_REG_START);
1269                                 for (i = 0; i < ext->reg_count; i++)
1270                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
1271                         } else {
1272                                 return;
1273                         }
1274                 }
1275         }
1276
1277         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1278         buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
1279                         PACKET3_SET_CONTEXT_REG_START);
1280         buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config);
1281         buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1);
1282
1283         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1284         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1285
1286         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1287         buffer[count++] = cpu_to_le32(0);
1288 }
1289
1290 static int gfx_v8_0_cp_jump_table_num(struct amdgpu_device *adev)
1291 {
1292         if (adev->asic_type == CHIP_CARRIZO)
1293                 return 5;
1294         else
1295                 return 4;
1296 }
1297
1298 static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
1299 {
1300         const struct cs_section_def *cs_data;
1301         int r;
1302
1303         adev->gfx.rlc.cs_data = vi_cs_data;
1304
1305         cs_data = adev->gfx.rlc.cs_data;
1306
1307         if (cs_data) {
1308                 /* init clear state block */
1309                 r = amdgpu_gfx_rlc_init_csb(adev);
1310                 if (r)
1311                         return r;
1312         }
1313
1314         if ((adev->asic_type == CHIP_CARRIZO) ||
1315             (adev->asic_type == CHIP_STONEY)) {
1316                 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1317                 r = amdgpu_gfx_rlc_init_cpt(adev);
1318                 if (r)
1319                         return r;
1320         }
1321
1322         /* init spm vmid with 0xf */
1323         if (adev->gfx.rlc.funcs->update_spm_vmid)
1324                 adev->gfx.rlc.funcs->update_spm_vmid(adev, 0xf);
1325
1326         return 0;
1327 }
1328
1329 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
1330 {
1331         amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1332 }
1333
1334 static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
1335 {
1336         int r;
1337         u32 *hpd;
1338         size_t mec_hpd_size;
1339
1340         bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1341
1342         /* take ownership of the relevant compute queues */
1343         amdgpu_gfx_compute_queue_acquire(adev);
1344
1345         mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE;
1346
1347         r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1348                                       AMDGPU_GEM_DOMAIN_VRAM,
1349                                       &adev->gfx.mec.hpd_eop_obj,
1350                                       &adev->gfx.mec.hpd_eop_gpu_addr,
1351                                       (void **)&hpd);
1352         if (r) {
1353                 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1354                 return r;
1355         }
1356
1357         memset(hpd, 0, mec_hpd_size);
1358
1359         amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1360         amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1361
1362         return 0;
1363 }
1364
1365 static const u32 vgpr_init_compute_shader[] =
1366 {
1367         0x7e000209, 0x7e020208,
1368         0x7e040207, 0x7e060206,
1369         0x7e080205, 0x7e0a0204,
1370         0x7e0c0203, 0x7e0e0202,
1371         0x7e100201, 0x7e120200,
1372         0x7e140209, 0x7e160208,
1373         0x7e180207, 0x7e1a0206,
1374         0x7e1c0205, 0x7e1e0204,
1375         0x7e200203, 0x7e220202,
1376         0x7e240201, 0x7e260200,
1377         0x7e280209, 0x7e2a0208,
1378         0x7e2c0207, 0x7e2e0206,
1379         0x7e300205, 0x7e320204,
1380         0x7e340203, 0x7e360202,
1381         0x7e380201, 0x7e3a0200,
1382         0x7e3c0209, 0x7e3e0208,
1383         0x7e400207, 0x7e420206,
1384         0x7e440205, 0x7e460204,
1385         0x7e480203, 0x7e4a0202,
1386         0x7e4c0201, 0x7e4e0200,
1387         0x7e500209, 0x7e520208,
1388         0x7e540207, 0x7e560206,
1389         0x7e580205, 0x7e5a0204,
1390         0x7e5c0203, 0x7e5e0202,
1391         0x7e600201, 0x7e620200,
1392         0x7e640209, 0x7e660208,
1393         0x7e680207, 0x7e6a0206,
1394         0x7e6c0205, 0x7e6e0204,
1395         0x7e700203, 0x7e720202,
1396         0x7e740201, 0x7e760200,
1397         0x7e780209, 0x7e7a0208,
1398         0x7e7c0207, 0x7e7e0206,
1399         0xbf8a0000, 0xbf810000,
1400 };
1401
1402 static const u32 sgpr_init_compute_shader[] =
1403 {
1404         0xbe8a0100, 0xbe8c0102,
1405         0xbe8e0104, 0xbe900106,
1406         0xbe920108, 0xbe940100,
1407         0xbe960102, 0xbe980104,
1408         0xbe9a0106, 0xbe9c0108,
1409         0xbe9e0100, 0xbea00102,
1410         0xbea20104, 0xbea40106,
1411         0xbea60108, 0xbea80100,
1412         0xbeaa0102, 0xbeac0104,
1413         0xbeae0106, 0xbeb00108,
1414         0xbeb20100, 0xbeb40102,
1415         0xbeb60104, 0xbeb80106,
1416         0xbeba0108, 0xbebc0100,
1417         0xbebe0102, 0xbec00104,
1418         0xbec20106, 0xbec40108,
1419         0xbec60100, 0xbec80102,
1420         0xbee60004, 0xbee70005,
1421         0xbeea0006, 0xbeeb0007,
1422         0xbee80008, 0xbee90009,
1423         0xbefc0000, 0xbf8a0000,
1424         0xbf810000, 0x00000000,
1425 };
1426
1427 static const u32 vgpr_init_regs[] =
1428 {
1429         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1430         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */
1431         mmCOMPUTE_NUM_THREAD_X, 256*4,
1432         mmCOMPUTE_NUM_THREAD_Y, 1,
1433         mmCOMPUTE_NUM_THREAD_Z, 1,
1434         mmCOMPUTE_PGM_RSRC1, 0x100004f, /* VGPRS=15 (64 logical VGPRs), SGPRS=1 (16 SGPRs), BULKY=1 */
1435         mmCOMPUTE_PGM_RSRC2, 20,
1436         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1437         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1438         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1439         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1440         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1441         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1442         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1443         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1444         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1445         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1446 };
1447
1448 static const u32 sgpr1_init_regs[] =
1449 {
1450         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1451         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */
1452         mmCOMPUTE_NUM_THREAD_X, 256*5,
1453         mmCOMPUTE_NUM_THREAD_Y, 1,
1454         mmCOMPUTE_NUM_THREAD_Z, 1,
1455         mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */
1456         mmCOMPUTE_PGM_RSRC2, 20,
1457         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1458         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1459         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1460         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1461         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1462         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1463         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1464         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1465         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1466         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1467 };
1468
1469 static const u32 sgpr2_init_regs[] =
1470 {
1471         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1472         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1473         mmCOMPUTE_NUM_THREAD_X, 256*5,
1474         mmCOMPUTE_NUM_THREAD_Y, 1,
1475         mmCOMPUTE_NUM_THREAD_Z, 1,
1476         mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */
1477         mmCOMPUTE_PGM_RSRC2, 20,
1478         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1479         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1480         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1481         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1482         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1483         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1484         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1485         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1486         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1487         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1488 };
1489
1490 static const u32 sec_ded_counter_registers[] =
1491 {
1492         mmCPC_EDC_ATC_CNT,
1493         mmCPC_EDC_SCRATCH_CNT,
1494         mmCPC_EDC_UCODE_CNT,
1495         mmCPF_EDC_ATC_CNT,
1496         mmCPF_EDC_ROQ_CNT,
1497         mmCPF_EDC_TAG_CNT,
1498         mmCPG_EDC_ATC_CNT,
1499         mmCPG_EDC_DMA_CNT,
1500         mmCPG_EDC_TAG_CNT,
1501         mmDC_EDC_CSINVOC_CNT,
1502         mmDC_EDC_RESTORE_CNT,
1503         mmDC_EDC_STATE_CNT,
1504         mmGDS_EDC_CNT,
1505         mmGDS_EDC_GRBM_CNT,
1506         mmGDS_EDC_OA_DED,
1507         mmSPI_EDC_CNT,
1508         mmSQC_ATC_EDC_GATCL1_CNT,
1509         mmSQC_EDC_CNT,
1510         mmSQ_EDC_DED_CNT,
1511         mmSQ_EDC_INFO,
1512         mmSQ_EDC_SEC_CNT,
1513         mmTCC_EDC_CNT,
1514         mmTCP_ATC_EDC_GATCL1_CNT,
1515         mmTCP_EDC_CNT,
1516         mmTD_EDC_CNT
1517 };
1518
1519 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1520 {
1521         struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1522         struct amdgpu_ib ib;
1523         struct dma_fence *f = NULL;
1524         int r, i;
1525         u32 tmp;
1526         unsigned total_size, vgpr_offset, sgpr_offset;
1527         u64 gpu_addr;
1528
1529         /* only supported on CZ */
1530         if (adev->asic_type != CHIP_CARRIZO)
1531                 return 0;
1532
1533         /* bail if the compute ring is not ready */
1534         if (!ring->sched.ready)
1535                 return 0;
1536
1537         tmp = RREG32(mmGB_EDC_MODE);
1538         WREG32(mmGB_EDC_MODE, 0);
1539
1540         total_size =
1541                 (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1542         total_size +=
1543                 (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1544         total_size +=
1545                 (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1546         total_size = ALIGN(total_size, 256);
1547         vgpr_offset = total_size;
1548         total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1549         sgpr_offset = total_size;
1550         total_size += sizeof(sgpr_init_compute_shader);
1551
1552         /* allocate an indirect buffer to put the commands in */
1553         memset(&ib, 0, sizeof(ib));
1554         r = amdgpu_ib_get(adev, NULL, total_size,
1555                                         AMDGPU_IB_POOL_DIRECT, &ib);
1556         if (r) {
1557                 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1558                 return r;
1559         }
1560
1561         /* load the compute shaders */
1562         for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1563                 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1564
1565         for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1566                 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1567
1568         /* init the ib length to 0 */
1569         ib.length_dw = 0;
1570
1571         /* VGPR */
1572         /* write the register state for the compute dispatch */
1573         for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1574                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1575                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1576                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1577         }
1578         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1579         gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1580         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1581         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1582         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1583         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1584
1585         /* write dispatch packet */
1586         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1587         ib.ptr[ib.length_dw++] = 8; /* x */
1588         ib.ptr[ib.length_dw++] = 1; /* y */
1589         ib.ptr[ib.length_dw++] = 1; /* z */
1590         ib.ptr[ib.length_dw++] =
1591                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1592
1593         /* write CS partial flush packet */
1594         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1595         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1596
1597         /* SGPR1 */
1598         /* write the register state for the compute dispatch */
1599         for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1600                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1601                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1602                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1603         }
1604         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1605         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1606         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1607         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1608         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1609         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1610
1611         /* write dispatch packet */
1612         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1613         ib.ptr[ib.length_dw++] = 8; /* x */
1614         ib.ptr[ib.length_dw++] = 1; /* y */
1615         ib.ptr[ib.length_dw++] = 1; /* z */
1616         ib.ptr[ib.length_dw++] =
1617                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1618
1619         /* write CS partial flush packet */
1620         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1621         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1622
1623         /* SGPR2 */
1624         /* write the register state for the compute dispatch */
1625         for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1626                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1627                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1628                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1629         }
1630         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1631         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1632         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1633         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1634         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1635         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1636
1637         /* write dispatch packet */
1638         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1639         ib.ptr[ib.length_dw++] = 8; /* x */
1640         ib.ptr[ib.length_dw++] = 1; /* y */
1641         ib.ptr[ib.length_dw++] = 1; /* z */
1642         ib.ptr[ib.length_dw++] =
1643                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1644
1645         /* write CS partial flush packet */
1646         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1647         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1648
1649         /* shedule the ib on the ring */
1650         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1651         if (r) {
1652                 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1653                 goto fail;
1654         }
1655
1656         /* wait for the GPU to finish processing the IB */
1657         r = dma_fence_wait(f, false);
1658         if (r) {
1659                 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1660                 goto fail;
1661         }
1662
1663         tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1664         tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1665         WREG32(mmGB_EDC_MODE, tmp);
1666
1667         tmp = RREG32(mmCC_GC_EDC_CONFIG);
1668         tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1669         WREG32(mmCC_GC_EDC_CONFIG, tmp);
1670
1671
1672         /* read back registers to clear the counters */
1673         for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1674                 RREG32(sec_ded_counter_registers[i]);
1675
1676 fail:
1677         amdgpu_ib_free(adev, &ib, NULL);
1678         dma_fence_put(f);
1679
1680         return r;
1681 }
1682
1683 static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1684 {
1685         u32 gb_addr_config;
1686         u32 mc_arb_ramcfg;
1687         u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1688         u32 tmp;
1689         int ret;
1690
1691         switch (adev->asic_type) {
1692         case CHIP_TOPAZ:
1693                 adev->gfx.config.max_shader_engines = 1;
1694                 adev->gfx.config.max_tile_pipes = 2;
1695                 adev->gfx.config.max_cu_per_sh = 6;
1696                 adev->gfx.config.max_sh_per_se = 1;
1697                 adev->gfx.config.max_backends_per_se = 2;
1698                 adev->gfx.config.max_texture_channel_caches = 2;
1699                 adev->gfx.config.max_gprs = 256;
1700                 adev->gfx.config.max_gs_threads = 32;
1701                 adev->gfx.config.max_hw_contexts = 8;
1702
1703                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1704                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1705                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1706                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1707                 gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1708                 break;
1709         case CHIP_FIJI:
1710                 adev->gfx.config.max_shader_engines = 4;
1711                 adev->gfx.config.max_tile_pipes = 16;
1712                 adev->gfx.config.max_cu_per_sh = 16;
1713                 adev->gfx.config.max_sh_per_se = 1;
1714                 adev->gfx.config.max_backends_per_se = 4;
1715                 adev->gfx.config.max_texture_channel_caches = 16;
1716                 adev->gfx.config.max_gprs = 256;
1717                 adev->gfx.config.max_gs_threads = 32;
1718                 adev->gfx.config.max_hw_contexts = 8;
1719
1720                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1721                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1722                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1723                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1724                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1725                 break;
1726         case CHIP_POLARIS11:
1727         case CHIP_POLARIS12:
1728                 ret = amdgpu_atombios_get_gfx_info(adev);
1729                 if (ret)
1730                         return ret;
1731                 adev->gfx.config.max_gprs = 256;
1732                 adev->gfx.config.max_gs_threads = 32;
1733                 adev->gfx.config.max_hw_contexts = 8;
1734
1735                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1736                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1737                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1738                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1739                 gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
1740                 break;
1741         case CHIP_POLARIS10:
1742         case CHIP_VEGAM:
1743                 ret = amdgpu_atombios_get_gfx_info(adev);
1744                 if (ret)
1745                         return ret;
1746                 adev->gfx.config.max_gprs = 256;
1747                 adev->gfx.config.max_gs_threads = 32;
1748                 adev->gfx.config.max_hw_contexts = 8;
1749
1750                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1751                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1752                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1753                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1754                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1755                 break;
1756         case CHIP_TONGA:
1757                 adev->gfx.config.max_shader_engines = 4;
1758                 adev->gfx.config.max_tile_pipes = 8;
1759                 adev->gfx.config.max_cu_per_sh = 8;
1760                 adev->gfx.config.max_sh_per_se = 1;
1761                 adev->gfx.config.max_backends_per_se = 2;
1762                 adev->gfx.config.max_texture_channel_caches = 8;
1763                 adev->gfx.config.max_gprs = 256;
1764                 adev->gfx.config.max_gs_threads = 32;
1765                 adev->gfx.config.max_hw_contexts = 8;
1766
1767                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1768                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1769                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1770                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1771                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1772                 break;
1773         case CHIP_CARRIZO:
1774                 adev->gfx.config.max_shader_engines = 1;
1775                 adev->gfx.config.max_tile_pipes = 2;
1776                 adev->gfx.config.max_sh_per_se = 1;
1777                 adev->gfx.config.max_backends_per_se = 2;
1778                 adev->gfx.config.max_cu_per_sh = 8;
1779                 adev->gfx.config.max_texture_channel_caches = 2;
1780                 adev->gfx.config.max_gprs = 256;
1781                 adev->gfx.config.max_gs_threads = 32;
1782                 adev->gfx.config.max_hw_contexts = 8;
1783
1784                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1785                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1786                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1787                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1788                 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1789                 break;
1790         case CHIP_STONEY:
1791                 adev->gfx.config.max_shader_engines = 1;
1792                 adev->gfx.config.max_tile_pipes = 2;
1793                 adev->gfx.config.max_sh_per_se = 1;
1794                 adev->gfx.config.max_backends_per_se = 1;
1795                 adev->gfx.config.max_cu_per_sh = 3;
1796                 adev->gfx.config.max_texture_channel_caches = 2;
1797                 adev->gfx.config.max_gprs = 256;
1798                 adev->gfx.config.max_gs_threads = 16;
1799                 adev->gfx.config.max_hw_contexts = 8;
1800
1801                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1802                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1803                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1804                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1805                 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1806                 break;
1807         default:
1808                 adev->gfx.config.max_shader_engines = 2;
1809                 adev->gfx.config.max_tile_pipes = 4;
1810                 adev->gfx.config.max_cu_per_sh = 2;
1811                 adev->gfx.config.max_sh_per_se = 1;
1812                 adev->gfx.config.max_backends_per_se = 2;
1813                 adev->gfx.config.max_texture_channel_caches = 4;
1814                 adev->gfx.config.max_gprs = 256;
1815                 adev->gfx.config.max_gs_threads = 32;
1816                 adev->gfx.config.max_hw_contexts = 8;
1817
1818                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1819                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1820                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1821                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1822                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1823                 break;
1824         }
1825
1826         adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1827         mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1828
1829         adev->gfx.config.num_banks = REG_GET_FIELD(mc_arb_ramcfg,
1830                                 MC_ARB_RAMCFG, NOOFBANK);
1831         adev->gfx.config.num_ranks = REG_GET_FIELD(mc_arb_ramcfg,
1832                                 MC_ARB_RAMCFG, NOOFRANKS);
1833
1834         adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1835         adev->gfx.config.mem_max_burst_length_bytes = 256;
1836         if (adev->flags & AMD_IS_APU) {
1837                 /* Get memory bank mapping mode. */
1838                 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
1839                 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1840                 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1841
1842                 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
1843                 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1844                 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1845
1846                 /* Validate settings in case only one DIMM installed. */
1847                 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
1848                         dimm00_addr_map = 0;
1849                 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
1850                         dimm01_addr_map = 0;
1851                 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
1852                         dimm10_addr_map = 0;
1853                 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
1854                         dimm11_addr_map = 0;
1855
1856                 /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1857                 /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1858                 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
1859                         adev->gfx.config.mem_row_size_in_kb = 2;
1860                 else
1861                         adev->gfx.config.mem_row_size_in_kb = 1;
1862         } else {
1863                 tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
1864                 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1865                 if (adev->gfx.config.mem_row_size_in_kb > 4)
1866                         adev->gfx.config.mem_row_size_in_kb = 4;
1867         }
1868
1869         adev->gfx.config.shader_engine_tile_size = 32;
1870         adev->gfx.config.num_gpus = 1;
1871         adev->gfx.config.multi_gpu_tile_size = 64;
1872
1873         /* fix up row size */
1874         switch (adev->gfx.config.mem_row_size_in_kb) {
1875         case 1:
1876         default:
1877                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
1878                 break;
1879         case 2:
1880                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
1881                 break;
1882         case 4:
1883                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
1884                 break;
1885         }
1886         adev->gfx.config.gb_addr_config = gb_addr_config;
1887
1888         return 0;
1889 }
1890
1891 static int gfx_v8_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
1892                                         int mec, int pipe, int queue)
1893 {
1894         int r;
1895         unsigned irq_type;
1896         struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
1897         unsigned int hw_prio;
1898
1899         ring = &adev->gfx.compute_ring[ring_id];
1900
1901         /* mec0 is me1 */
1902         ring->me = mec + 1;
1903         ring->pipe = pipe;
1904         ring->queue = queue;
1905
1906         ring->ring_obj = NULL;
1907         ring->use_doorbell = true;
1908         ring->doorbell_index = adev->doorbell_index.mec_ring0 + ring_id;
1909         ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
1910                                 + (ring_id * GFX8_MEC_HPD_SIZE);
1911         sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
1912
1913         irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
1914                 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
1915                 + ring->pipe;
1916
1917         hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring->queue) ?
1918                         AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_RING_PRIO_DEFAULT;
1919         /* type-2 packets are deprecated on MEC, use type-3 instead */
1920         r = amdgpu_ring_init(adev, ring, 1024,
1921                              &adev->gfx.eop_irq, irq_type, hw_prio);
1922         if (r)
1923                 return r;
1924
1925
1926         return 0;
1927 }
1928
1929 static void gfx_v8_0_sq_irq_work_func(struct work_struct *work);
1930
1931 static int gfx_v8_0_sw_init(void *handle)
1932 {
1933         int i, j, k, r, ring_id;
1934         struct amdgpu_ring *ring;
1935         struct amdgpu_kiq *kiq;
1936         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1937
1938         switch (adev->asic_type) {
1939         case CHIP_TONGA:
1940         case CHIP_CARRIZO:
1941         case CHIP_FIJI:
1942         case CHIP_POLARIS10:
1943         case CHIP_POLARIS11:
1944         case CHIP_POLARIS12:
1945         case CHIP_VEGAM:
1946                 adev->gfx.mec.num_mec = 2;
1947                 break;
1948         case CHIP_TOPAZ:
1949         case CHIP_STONEY:
1950         default:
1951                 adev->gfx.mec.num_mec = 1;
1952                 break;
1953         }
1954
1955         adev->gfx.mec.num_pipe_per_mec = 4;
1956         adev->gfx.mec.num_queue_per_pipe = 8;
1957
1958         /* EOP Event */
1959         r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_END_OF_PIPE, &adev->gfx.eop_irq);
1960         if (r)
1961                 return r;
1962
1963         /* Privileged reg */
1964         r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_PRIV_REG_FAULT,
1965                               &adev->gfx.priv_reg_irq);
1966         if (r)
1967                 return r;
1968
1969         /* Privileged inst */
1970         r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_PRIV_INSTR_FAULT,
1971                               &adev->gfx.priv_inst_irq);
1972         if (r)
1973                 return r;
1974
1975         /* Add CP EDC/ECC irq  */
1976         r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_ECC_ERROR,
1977                               &adev->gfx.cp_ecc_error_irq);
1978         if (r)
1979                 return r;
1980
1981         /* SQ interrupts. */
1982         r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SQ_INTERRUPT_MSG,
1983                               &adev->gfx.sq_irq);
1984         if (r) {
1985                 DRM_ERROR("amdgpu_irq_add() for SQ failed: %d\n", r);
1986                 return r;
1987         }
1988
1989         INIT_WORK(&adev->gfx.sq_work.work, gfx_v8_0_sq_irq_work_func);
1990
1991         adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
1992
1993         gfx_v8_0_scratch_init(adev);
1994
1995         r = gfx_v8_0_init_microcode(adev);
1996         if (r) {
1997                 DRM_ERROR("Failed to load gfx firmware!\n");
1998                 return r;
1999         }
2000
2001         r = adev->gfx.rlc.funcs->init(adev);
2002         if (r) {
2003                 DRM_ERROR("Failed to init rlc BOs!\n");
2004                 return r;
2005         }
2006
2007         r = gfx_v8_0_mec_init(adev);
2008         if (r) {
2009                 DRM_ERROR("Failed to init MEC BOs!\n");
2010                 return r;
2011         }
2012
2013         /* set up the gfx ring */
2014         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2015                 ring = &adev->gfx.gfx_ring[i];
2016                 ring->ring_obj = NULL;
2017                 sprintf(ring->name, "gfx");
2018                 /* no gfx doorbells on iceland */
2019                 if (adev->asic_type != CHIP_TOPAZ) {
2020                         ring->use_doorbell = true;
2021                         ring->doorbell_index = adev->doorbell_index.gfx_ring0;
2022                 }
2023
2024                 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2025                                      AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP,
2026                                      AMDGPU_RING_PRIO_DEFAULT);
2027                 if (r)
2028                         return r;
2029         }
2030
2031
2032         /* set up the compute queues - allocate horizontally across pipes */
2033         ring_id = 0;
2034         for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
2035                 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
2036                         for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
2037                                 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
2038                                         continue;
2039
2040                                 r = gfx_v8_0_compute_ring_init(adev,
2041                                                                 ring_id,
2042                                                                 i, k, j);
2043                                 if (r)
2044                                         return r;
2045
2046                                 ring_id++;
2047                         }
2048                 }
2049         }
2050
2051         r = amdgpu_gfx_kiq_init(adev, GFX8_MEC_HPD_SIZE);
2052         if (r) {
2053                 DRM_ERROR("Failed to init KIQ BOs!\n");
2054                 return r;
2055         }
2056
2057         kiq = &adev->gfx.kiq;
2058         r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
2059         if (r)
2060                 return r;
2061
2062         /* create MQD for all compute queues as well as KIQ for SRIOV case */
2063         r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct vi_mqd_allocation));
2064         if (r)
2065                 return r;
2066
2067         adev->gfx.ce_ram_size = 0x8000;
2068
2069         r = gfx_v8_0_gpu_early_init(adev);
2070         if (r)
2071                 return r;
2072
2073         return 0;
2074 }
2075
2076 static int gfx_v8_0_sw_fini(void *handle)
2077 {
2078         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2079         int i;
2080
2081         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2082                 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2083         for (i = 0; i < adev->gfx.num_compute_rings; i++)
2084                 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2085
2086         amdgpu_gfx_mqd_sw_fini(adev);
2087         amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring);
2088         amdgpu_gfx_kiq_fini(adev);
2089
2090         gfx_v8_0_mec_fini(adev);
2091         amdgpu_gfx_rlc_fini(adev);
2092         amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
2093                                 &adev->gfx.rlc.clear_state_gpu_addr,
2094                                 (void **)&adev->gfx.rlc.cs_ptr);
2095         if ((adev->asic_type == CHIP_CARRIZO) ||
2096             (adev->asic_type == CHIP_STONEY)) {
2097                 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
2098                                 &adev->gfx.rlc.cp_table_gpu_addr,
2099                                 (void **)&adev->gfx.rlc.cp_table_ptr);
2100         }
2101         gfx_v8_0_free_microcode(adev);
2102
2103         return 0;
2104 }
2105
2106 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
2107 {
2108         uint32_t *modearray, *mod2array;
2109         const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
2110         const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
2111         u32 reg_offset;
2112
2113         modearray = adev->gfx.config.tile_mode_array;
2114         mod2array = adev->gfx.config.macrotile_mode_array;
2115
2116         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2117                 modearray[reg_offset] = 0;
2118
2119         for (reg_offset = 0; reg_offset <  num_secondary_tile_mode_states; reg_offset++)
2120                 mod2array[reg_offset] = 0;
2121
2122         switch (adev->asic_type) {
2123         case CHIP_TOPAZ:
2124                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2125                                 PIPE_CONFIG(ADDR_SURF_P2) |
2126                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2127                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2128                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2129                                 PIPE_CONFIG(ADDR_SURF_P2) |
2130                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2131                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2132                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2133                                 PIPE_CONFIG(ADDR_SURF_P2) |
2134                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2135                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2136                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2137                                 PIPE_CONFIG(ADDR_SURF_P2) |
2138                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2139                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2140                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2141                                 PIPE_CONFIG(ADDR_SURF_P2) |
2142                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2143                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2144                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2145                                 PIPE_CONFIG(ADDR_SURF_P2) |
2146                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2147                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2148                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2149                                 PIPE_CONFIG(ADDR_SURF_P2) |
2150                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2151                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2152                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2153                                 PIPE_CONFIG(ADDR_SURF_P2));
2154                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2155                                 PIPE_CONFIG(ADDR_SURF_P2) |
2156                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2157                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2158                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2159                                  PIPE_CONFIG(ADDR_SURF_P2) |
2160                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2161                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2162                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2163                                  PIPE_CONFIG(ADDR_SURF_P2) |
2164                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2165                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2166                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2167                                  PIPE_CONFIG(ADDR_SURF_P2) |
2168                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2169                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2170                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2171                                  PIPE_CONFIG(ADDR_SURF_P2) |
2172                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2173                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2174                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2175                                  PIPE_CONFIG(ADDR_SURF_P2) |
2176                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2177                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2178                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2179                                  PIPE_CONFIG(ADDR_SURF_P2) |
2180                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2181                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2182                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2183                                  PIPE_CONFIG(ADDR_SURF_P2) |
2184                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2185                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2186                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2187                                  PIPE_CONFIG(ADDR_SURF_P2) |
2188                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2189                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2190                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2191                                  PIPE_CONFIG(ADDR_SURF_P2) |
2192                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2193                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2194                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2195                                  PIPE_CONFIG(ADDR_SURF_P2) |
2196                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2197                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2198                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2199                                  PIPE_CONFIG(ADDR_SURF_P2) |
2200                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2201                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2202                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2203                                  PIPE_CONFIG(ADDR_SURF_P2) |
2204                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2205                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2206                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2207                                  PIPE_CONFIG(ADDR_SURF_P2) |
2208                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2209                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2210                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2211                                  PIPE_CONFIG(ADDR_SURF_P2) |
2212                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2213                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2214                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2215                                  PIPE_CONFIG(ADDR_SURF_P2) |
2216                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2217                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2218                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2219                                  PIPE_CONFIG(ADDR_SURF_P2) |
2220                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2221                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2222                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2223                                  PIPE_CONFIG(ADDR_SURF_P2) |
2224                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2225                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2226
2227                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2228                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2229                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2230                                 NUM_BANKS(ADDR_SURF_8_BANK));
2231                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2232                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2233                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2234                                 NUM_BANKS(ADDR_SURF_8_BANK));
2235                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2236                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2237                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2238                                 NUM_BANKS(ADDR_SURF_8_BANK));
2239                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2240                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2241                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2242                                 NUM_BANKS(ADDR_SURF_8_BANK));
2243                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2244                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2245                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2246                                 NUM_BANKS(ADDR_SURF_8_BANK));
2247                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2248                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2249                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2250                                 NUM_BANKS(ADDR_SURF_8_BANK));
2251                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2252                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2253                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2254                                 NUM_BANKS(ADDR_SURF_8_BANK));
2255                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2256                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2257                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2258                                 NUM_BANKS(ADDR_SURF_16_BANK));
2259                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2260                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2261                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2262                                 NUM_BANKS(ADDR_SURF_16_BANK));
2263                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2264                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2265                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2266                                  NUM_BANKS(ADDR_SURF_16_BANK));
2267                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2268                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2269                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2270                                  NUM_BANKS(ADDR_SURF_16_BANK));
2271                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2272                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2273                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2274                                  NUM_BANKS(ADDR_SURF_16_BANK));
2275                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2276                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2277                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2278                                  NUM_BANKS(ADDR_SURF_16_BANK));
2279                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2280                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2281                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2282                                  NUM_BANKS(ADDR_SURF_8_BANK));
2283
2284                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2285                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2286                             reg_offset != 23)
2287                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2288
2289                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2290                         if (reg_offset != 7)
2291                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2292
2293                 break;
2294         case CHIP_FIJI:
2295         case CHIP_VEGAM:
2296                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2297                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2298                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2299                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2300                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2301                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2302                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2303                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2304                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2305                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2306                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2307                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2308                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2309                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2310                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2311                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2312                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2313                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2314                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2315                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2316                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2317                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2318                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2319                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2320                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2321                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2322                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2323                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2324                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2325                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2326                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2327                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2328                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2329                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2330                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2331                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2332                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2333                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2334                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2335                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2336                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2337                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2338                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2339                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2340                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2341                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2342                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2343                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2344                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2345                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2346                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2347                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2348                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2349                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2350                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2351                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2352                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2353                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2354                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2355                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2356                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2357                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2358                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2359                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2360                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2361                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2362                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2363                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2364                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2365                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2366                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2367                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2368                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2369                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2370                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2371                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2372                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2373                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2374                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2375                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2376                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2377                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2378                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2379                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2380                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2381                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2382                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2383                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2384                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2385                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2386                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2387                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2388                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2389                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2390                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2391                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2392                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2393                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2394                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2395                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2396                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2397                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2398                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2399                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2400                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2401                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2402                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2403                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2404                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2405                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2406                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2407                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2408                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2409                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2410                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2411                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2412                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2413                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2414                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2415                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2416                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2417                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2418
2419                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2420                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2421                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2422                                 NUM_BANKS(ADDR_SURF_8_BANK));
2423                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2424                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2425                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2426                                 NUM_BANKS(ADDR_SURF_8_BANK));
2427                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2428                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2429                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2430                                 NUM_BANKS(ADDR_SURF_8_BANK));
2431                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2432                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2433                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2434                                 NUM_BANKS(ADDR_SURF_8_BANK));
2435                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2436                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2437                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2438                                 NUM_BANKS(ADDR_SURF_8_BANK));
2439                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2440                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2441                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2442                                 NUM_BANKS(ADDR_SURF_8_BANK));
2443                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2444                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2445                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2446                                 NUM_BANKS(ADDR_SURF_8_BANK));
2447                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2448                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2449                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2450                                 NUM_BANKS(ADDR_SURF_8_BANK));
2451                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2452                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2453                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2454                                 NUM_BANKS(ADDR_SURF_8_BANK));
2455                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2456                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2457                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2458                                  NUM_BANKS(ADDR_SURF_8_BANK));
2459                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2460                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2461                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2462                                  NUM_BANKS(ADDR_SURF_8_BANK));
2463                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2464                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2465                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2466                                  NUM_BANKS(ADDR_SURF_8_BANK));
2467                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2468                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2469                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2470                                  NUM_BANKS(ADDR_SURF_8_BANK));
2471                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2472                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2473                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2474                                  NUM_BANKS(ADDR_SURF_4_BANK));
2475
2476                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2477                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2478
2479                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2480                         if (reg_offset != 7)
2481                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2482
2483                 break;
2484         case CHIP_TONGA:
2485                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2486                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2487                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2488                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2489                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2490                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2491                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2492                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2493                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2494                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2495                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2496                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2497                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2498                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2499                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2500                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2501                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2502                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2503                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2504                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2505                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2506                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2507                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2508                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2509                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2510                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2511                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2512                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2513                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2514                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2515                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2516                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2517                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2518                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2519                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2520                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2521                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2522                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2523                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2524                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2525                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2526                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2527                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2528                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2529                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2530                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2531                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2532                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2533                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2534                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2535                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2536                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2537                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2538                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2539                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2540                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2541                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2542                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2543                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2544                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2545                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2546                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2547                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2548                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2549                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2550                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2551                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2552                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2553                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2554                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2555                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2556                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2557                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2558                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2559                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2560                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2561                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2562                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2563                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2564                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2565                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2566                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2567                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2568                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2569                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2570                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2571                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2572                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2573                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2574                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2575                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2576                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2577                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2578                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2579                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2580                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2581                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2582                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2583                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2584                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2585                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2586                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2587                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2588                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2589                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2590                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2591                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2592                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2593                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2594                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2595                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2596                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2597                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2598                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2599                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2600                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2601                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2602                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2603                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2604                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2605                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2606                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2607
2608                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2609                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2610                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2611                                 NUM_BANKS(ADDR_SURF_16_BANK));
2612                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2613                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2614                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2615                                 NUM_BANKS(ADDR_SURF_16_BANK));
2616                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2617                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2618                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2619                                 NUM_BANKS(ADDR_SURF_16_BANK));
2620                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2621                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2622                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2623                                 NUM_BANKS(ADDR_SURF_16_BANK));
2624                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2625                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2626                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2627                                 NUM_BANKS(ADDR_SURF_16_BANK));
2628                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2629                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2630                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2631                                 NUM_BANKS(ADDR_SURF_16_BANK));
2632                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2633                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2634                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2635                                 NUM_BANKS(ADDR_SURF_16_BANK));
2636                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2637                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2638                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2639                                 NUM_BANKS(ADDR_SURF_16_BANK));
2640                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2641                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2642                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2643                                 NUM_BANKS(ADDR_SURF_16_BANK));
2644                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2645                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2646                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2647                                  NUM_BANKS(ADDR_SURF_16_BANK));
2648                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2649                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2650                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2651                                  NUM_BANKS(ADDR_SURF_16_BANK));
2652                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2653                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2654                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2655                                  NUM_BANKS(ADDR_SURF_8_BANK));
2656                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2657                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2658                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2659                                  NUM_BANKS(ADDR_SURF_4_BANK));
2660                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2661                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2662                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2663                                  NUM_BANKS(ADDR_SURF_4_BANK));
2664
2665                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2666                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2667
2668                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2669                         if (reg_offset != 7)
2670                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2671
2672                 break;
2673         case CHIP_POLARIS11:
2674         case CHIP_POLARIS12:
2675                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2676                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2677                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2678                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2679                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2680                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2681                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2682                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2683                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2684                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2685                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2686                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2687                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2688                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2689                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2690                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2691                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2692                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2693                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2694                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2695                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2696                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2697                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2698                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2699                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2700                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2701                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2702                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2703                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2704                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2705                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2706                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2707                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2708                                 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2709                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2710                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2711                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2712                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2713                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2714                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2715                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2716                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2717                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2718                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2719                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2720                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2721                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2722                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2723                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2724                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2725                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2726                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2727                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2728                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2729                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2730                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2731                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2732                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2733                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2734                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2735                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2736                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2737                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2738                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2739                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2740                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2741                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2742                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2743                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2744                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2745                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2746                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2747                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2748                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2749                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2750                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2751                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2752                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2753                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2754                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2755                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2756                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2757                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2758                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2759                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2760                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2761                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2762                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2763                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2764                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2765                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2766                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2767                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2768                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2769                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2770                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2771                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2772                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2773                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2774                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2775                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2776                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2777                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2778                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2779                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2780                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2781                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2782                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2783                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2784                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2785                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2786                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2787                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2788                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2789                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2790                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2791                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2792                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2793                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2794                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2795                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2796                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2797
2798                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2799                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2800                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2801                                 NUM_BANKS(ADDR_SURF_16_BANK));
2802
2803                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2804                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2805                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2806                                 NUM_BANKS(ADDR_SURF_16_BANK));
2807
2808                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2809                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2810                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2811                                 NUM_BANKS(ADDR_SURF_16_BANK));
2812
2813                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2814                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2815                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2816                                 NUM_BANKS(ADDR_SURF_16_BANK));
2817
2818                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2819                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2820                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2821                                 NUM_BANKS(ADDR_SURF_16_BANK));
2822
2823                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2824                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2825                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2826                                 NUM_BANKS(ADDR_SURF_16_BANK));
2827
2828                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2829                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2830                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2831                                 NUM_BANKS(ADDR_SURF_16_BANK));
2832
2833                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2834                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2835                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2836                                 NUM_BANKS(ADDR_SURF_16_BANK));
2837
2838                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2839                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2840                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2841                                 NUM_BANKS(ADDR_SURF_16_BANK));
2842
2843                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2844                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2845                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2846                                 NUM_BANKS(ADDR_SURF_16_BANK));
2847
2848                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2849                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2850                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2851                                 NUM_BANKS(ADDR_SURF_16_BANK));
2852
2853                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2854                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2855                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2856                                 NUM_BANKS(ADDR_SURF_16_BANK));
2857
2858                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2859                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2860                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2861                                 NUM_BANKS(ADDR_SURF_8_BANK));
2862
2863                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2864                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2865                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2866                                 NUM_BANKS(ADDR_SURF_4_BANK));
2867
2868                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2869                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2870
2871                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2872                         if (reg_offset != 7)
2873                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2874
2875                 break;
2876         case CHIP_POLARIS10:
2877                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2878                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2879                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2880                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2881                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2882                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2883                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2884                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2885                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2886                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2887                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2888                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2889                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2890                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2891                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2892                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2893                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2894                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2895                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2896                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2897                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2898                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2899                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2900                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2901                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2902                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2903                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2904                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2905                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2906                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2907                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2908                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2909                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2910                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2911                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2912                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2913                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2914                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2915                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2916                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2917                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2918                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2919                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2920                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2921                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2922                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2923                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2924                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2925                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2926                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2927                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2928                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2929                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2930                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2931                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2932                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2933                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2934                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2935                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2936                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2937                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2938                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2939                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2940                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2941                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2942                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2943                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2944                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2945                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2946                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2947                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2948                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2949                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2950                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2951                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2952                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2953                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2954                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2955                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2956                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2957                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2958                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2959                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2960                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2961                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2962                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2963                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2964                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2965                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2966                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2967                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2968                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2969                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2970                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2971                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2972                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2973                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2974                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2975                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2976                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2977                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2978                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2979                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2980                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2981                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2982                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2983                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2984                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2985                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2986                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2987                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2988                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2989                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2990                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2991                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2992                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2993                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2994                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2995                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2996                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2997                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2998                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2999
3000                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3001                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3002                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3003                                 NUM_BANKS(ADDR_SURF_16_BANK));
3004
3005                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3006                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3007                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3008                                 NUM_BANKS(ADDR_SURF_16_BANK));
3009
3010                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3011                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3012                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3013                                 NUM_BANKS(ADDR_SURF_16_BANK));
3014
3015                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3016                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3017                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3018                                 NUM_BANKS(ADDR_SURF_16_BANK));
3019
3020                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3021                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3022                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3023                                 NUM_BANKS(ADDR_SURF_16_BANK));
3024
3025                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3026                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3027                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3028                                 NUM_BANKS(ADDR_SURF_16_BANK));
3029
3030                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3031                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3032                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3033                                 NUM_BANKS(ADDR_SURF_16_BANK));
3034
3035                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3036                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3037                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3038                                 NUM_BANKS(ADDR_SURF_16_BANK));
3039
3040                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3041                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3042                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3043                                 NUM_BANKS(ADDR_SURF_16_BANK));
3044
3045                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3046                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3047                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3048                                 NUM_BANKS(ADDR_SURF_16_BANK));
3049
3050                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3051                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3052                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3053                                 NUM_BANKS(ADDR_SURF_16_BANK));
3054
3055                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3056                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3057                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3058                                 NUM_BANKS(ADDR_SURF_8_BANK));
3059
3060                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3061                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3062                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3063                                 NUM_BANKS(ADDR_SURF_4_BANK));
3064
3065                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3066                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3067                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3068                                 NUM_BANKS(ADDR_SURF_4_BANK));
3069
3070                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3071                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3072
3073                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3074                         if (reg_offset != 7)
3075                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3076
3077                 break;
3078         case CHIP_STONEY:
3079                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3080                                 PIPE_CONFIG(ADDR_SURF_P2) |
3081                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3082                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3083                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3084                                 PIPE_CONFIG(ADDR_SURF_P2) |
3085                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3086                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3087                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3088                                 PIPE_CONFIG(ADDR_SURF_P2) |
3089                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3090                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3091                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3092                                 PIPE_CONFIG(ADDR_SURF_P2) |
3093                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3094                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3095                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3096                                 PIPE_CONFIG(ADDR_SURF_P2) |
3097                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3098                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3099                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3100                                 PIPE_CONFIG(ADDR_SURF_P2) |
3101                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3102                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3103                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3104                                 PIPE_CONFIG(ADDR_SURF_P2) |
3105                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3106                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3107                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3108                                 PIPE_CONFIG(ADDR_SURF_P2));
3109                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3110                                 PIPE_CONFIG(ADDR_SURF_P2) |
3111                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3112                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3113                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3114                                  PIPE_CONFIG(ADDR_SURF_P2) |
3115                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3116                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3117                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3118                                  PIPE_CONFIG(ADDR_SURF_P2) |
3119                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3120                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3121                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3122                                  PIPE_CONFIG(ADDR_SURF_P2) |
3123                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3124                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3125                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3126                                  PIPE_CONFIG(ADDR_SURF_P2) |
3127                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3128                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3129                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3130                                  PIPE_CONFIG(ADDR_SURF_P2) |
3131                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3132                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3133                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3134                                  PIPE_CONFIG(ADDR_SURF_P2) |
3135                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3136                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3137                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3138                                  PIPE_CONFIG(ADDR_SURF_P2) |
3139                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3140                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3141                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3142                                  PIPE_CONFIG(ADDR_SURF_P2) |
3143                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3144                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3145                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3146                                  PIPE_CONFIG(ADDR_SURF_P2) |
3147                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3148                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3149                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3150                                  PIPE_CONFIG(ADDR_SURF_P2) |
3151                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3152                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3153                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3154                                  PIPE_CONFIG(ADDR_SURF_P2) |
3155                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3156                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3157                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3158                                  PIPE_CONFIG(ADDR_SURF_P2) |
3159                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3160                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3161                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3162                                  PIPE_CONFIG(ADDR_SURF_P2) |
3163                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3164                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3165                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3166                                  PIPE_CONFIG(ADDR_SURF_P2) |
3167                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3168                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3169                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3170                                  PIPE_CONFIG(ADDR_SURF_P2) |
3171                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3172                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3173                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3174                                  PIPE_CONFIG(ADDR_SURF_P2) |
3175                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3176                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3177                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3178                                  PIPE_CONFIG(ADDR_SURF_P2) |
3179                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3180                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3181
3182                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3183                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3184                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3185                                 NUM_BANKS(ADDR_SURF_8_BANK));
3186                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3187                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3188                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3189                                 NUM_BANKS(ADDR_SURF_8_BANK));
3190                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3191                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3192                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3193                                 NUM_BANKS(ADDR_SURF_8_BANK));
3194                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3195                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3196                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3197                                 NUM_BANKS(ADDR_SURF_8_BANK));
3198                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3199                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3200                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3201                                 NUM_BANKS(ADDR_SURF_8_BANK));
3202                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3203                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3204                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3205                                 NUM_BANKS(ADDR_SURF_8_BANK));
3206                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3207                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3208                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3209                                 NUM_BANKS(ADDR_SURF_8_BANK));
3210                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3211                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3212                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3213                                 NUM_BANKS(ADDR_SURF_16_BANK));
3214                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3215                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3216                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3217                                 NUM_BANKS(ADDR_SURF_16_BANK));
3218                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3219                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3220                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3221                                  NUM_BANKS(ADDR_SURF_16_BANK));
3222                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3223                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3224                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3225                                  NUM_BANKS(ADDR_SURF_16_BANK));
3226                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3227                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3228                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3229                                  NUM_BANKS(ADDR_SURF_16_BANK));
3230                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3231                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3232                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3233                                  NUM_BANKS(ADDR_SURF_16_BANK));
3234                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3235                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3236                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3237                                  NUM_BANKS(ADDR_SURF_8_BANK));
3238
3239                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3240                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3241                             reg_offset != 23)
3242                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3243
3244                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3245                         if (reg_offset != 7)
3246                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3247
3248                 break;
3249         default:
3250                 dev_warn(adev->dev,
3251                          "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
3252                          adev->asic_type);
3253                 /* fall through */
3254
3255         case CHIP_CARRIZO:
3256                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3257                                 PIPE_CONFIG(ADDR_SURF_P2) |
3258                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3259                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3260                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3261                                 PIPE_CONFIG(ADDR_SURF_P2) |
3262                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3263                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3264                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3265                                 PIPE_CONFIG(ADDR_SURF_P2) |
3266                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3267                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3268                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3269                                 PIPE_CONFIG(ADDR_SURF_P2) |
3270                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3271                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3272                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3273                                 PIPE_CONFIG(ADDR_SURF_P2) |
3274                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3275                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3276                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3277                                 PIPE_CONFIG(ADDR_SURF_P2) |
3278                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3279                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3280                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3281                                 PIPE_CONFIG(ADDR_SURF_P2) |
3282                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3283                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3284                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3285                                 PIPE_CONFIG(ADDR_SURF_P2));
3286                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3287                                 PIPE_CONFIG(ADDR_SURF_P2) |
3288                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3289                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3290                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3291                                  PIPE_CONFIG(ADDR_SURF_P2) |
3292                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3293                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3294                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3295                                  PIPE_CONFIG(ADDR_SURF_P2) |
3296                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3297                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3298                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3299                                  PIPE_CONFIG(ADDR_SURF_P2) |
3300                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3301                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3302                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3303                                  PIPE_CONFIG(ADDR_SURF_P2) |
3304                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3305                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3306                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3307                                  PIPE_CONFIG(ADDR_SURF_P2) |
3308                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3309                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3310                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3311                                  PIPE_CONFIG(ADDR_SURF_P2) |
3312                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3313                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3314                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3315                                  PIPE_CONFIG(ADDR_SURF_P2) |
3316                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3317                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3318                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3319                                  PIPE_CONFIG(ADDR_SURF_P2) |
3320                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3321                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3322                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3323                                  PIPE_CONFIG(ADDR_SURF_P2) |
3324                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3325                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3326                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3327                                  PIPE_CONFIG(ADDR_SURF_P2) |
3328                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3329                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3330                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3331                                  PIPE_CONFIG(ADDR_SURF_P2) |
3332                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3333                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3334                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3335                                  PIPE_CONFIG(ADDR_SURF_P2) |
3336                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3337                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3338                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3339                                  PIPE_CONFIG(ADDR_SURF_P2) |
3340                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3341                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3342                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3343                                  PIPE_CONFIG(ADDR_SURF_P2) |
3344                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3345                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3346                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3347                                  PIPE_CONFIG(ADDR_SURF_P2) |
3348                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3349                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3350                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3351                                  PIPE_CONFIG(ADDR_SURF_P2) |
3352                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3353                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3354                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3355                                  PIPE_CONFIG(ADDR_SURF_P2) |
3356                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3357                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3358
3359                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3360                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3361                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3362                                 NUM_BANKS(ADDR_SURF_8_BANK));
3363                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3364                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3365                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3366                                 NUM_BANKS(ADDR_SURF_8_BANK));
3367                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3368                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3369                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3370                                 NUM_BANKS(ADDR_SURF_8_BANK));
3371                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3372                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3373                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3374                                 NUM_BANKS(ADDR_SURF_8_BANK));
3375                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3376                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3377                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3378                                 NUM_BANKS(ADDR_SURF_8_BANK));
3379                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3380                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3381                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3382                                 NUM_BANKS(ADDR_SURF_8_BANK));
3383                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3384                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3385                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3386                                 NUM_BANKS(ADDR_SURF_8_BANK));
3387                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3388                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3389                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3390                                 NUM_BANKS(ADDR_SURF_16_BANK));
3391                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3392                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3393                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3394                                 NUM_BANKS(ADDR_SURF_16_BANK));
3395                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3396                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3397                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3398                                  NUM_BANKS(ADDR_SURF_16_BANK));
3399                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3400                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3401                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3402                                  NUM_BANKS(ADDR_SURF_16_BANK));
3403                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3404                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3405                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3406                                  NUM_BANKS(ADDR_SURF_16_BANK));
3407                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3408                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3409                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3410                                  NUM_BANKS(ADDR_SURF_16_BANK));
3411                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3412                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3413                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3414                                  NUM_BANKS(ADDR_SURF_8_BANK));
3415
3416                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3417                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3418                             reg_offset != 23)
3419                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3420
3421                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3422                         if (reg_offset != 7)
3423                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3424
3425                 break;
3426         }
3427 }
3428
3429 static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
3430                                   u32 se_num, u32 sh_num, u32 instance)
3431 {
3432         u32 data;
3433
3434         if (instance == 0xffffffff)
3435                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
3436         else
3437                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
3438
3439         if (se_num == 0xffffffff)
3440                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3441         else
3442                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3443
3444         if (sh_num == 0xffffffff)
3445                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3446         else
3447                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3448
3449         WREG32(mmGRBM_GFX_INDEX, data);
3450 }
3451
3452 static void gfx_v8_0_select_me_pipe_q(struct amdgpu_device *adev,
3453                                   u32 me, u32 pipe, u32 q, u32 vm)
3454 {
3455         vi_srbm_select(adev, me, pipe, q, vm);
3456 }
3457
3458 static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
3459 {
3460         u32 data, mask;
3461
3462         data =  RREG32(mmCC_RB_BACKEND_DISABLE) |
3463                 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3464
3465         data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE);
3466
3467         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
3468                                          adev->gfx.config.max_sh_per_se);
3469
3470         return (~data) & mask;
3471 }
3472
3473 static void
3474 gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1)
3475 {
3476         switch (adev->asic_type) {
3477         case CHIP_FIJI:
3478         case CHIP_VEGAM:
3479                 *rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) |
3480                           RB_XSEL2(1) | PKR_MAP(2) |
3481                           PKR_XSEL(1) | PKR_YSEL(1) |
3482                           SE_MAP(2) | SE_XSEL(2) | SE_YSEL(3);
3483                 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) |
3484                            SE_PAIR_YSEL(2);
3485                 break;
3486         case CHIP_TONGA:
3487         case CHIP_POLARIS10:
3488                 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3489                           SE_XSEL(1) | SE_YSEL(1);
3490                 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(2) |
3491                            SE_PAIR_YSEL(2);
3492                 break;
3493         case CHIP_TOPAZ:
3494         case CHIP_CARRIZO:
3495                 *rconf |= RB_MAP_PKR0(2);
3496                 *rconf1 |= 0x0;
3497                 break;
3498         case CHIP_POLARIS11:
3499         case CHIP_POLARIS12:
3500                 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3501                           SE_XSEL(1) | SE_YSEL(1);
3502                 *rconf1 |= 0x0;
3503                 break;
3504         case CHIP_STONEY:
3505                 *rconf |= 0x0;
3506                 *rconf1 |= 0x0;
3507                 break;
3508         default:
3509                 DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type);
3510                 break;
3511         }
3512 }
3513
3514 static void
3515 gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev,
3516                                         u32 raster_config, u32 raster_config_1,
3517                                         unsigned rb_mask, unsigned num_rb)
3518 {
3519         unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1);
3520         unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1);
3521         unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2);
3522         unsigned rb_per_se = num_rb / num_se;
3523         unsigned se_mask[4];
3524         unsigned se;
3525
3526         se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask;
3527         se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask;
3528         se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask;
3529         se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask;
3530
3531         WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4));
3532         WARN_ON(!(sh_per_se == 1 || sh_per_se == 2));
3533         WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2));
3534
3535         if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
3536                              (!se_mask[2] && !se_mask[3]))) {
3537                 raster_config_1 &= ~SE_PAIR_MAP_MASK;
3538
3539                 if (!se_mask[0] && !se_mask[1]) {
3540                         raster_config_1 |=
3541                                 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3);
3542                 } else {
3543                         raster_config_1 |=
3544                                 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0);
3545                 }
3546         }
3547
3548         for (se = 0; se < num_se; se++) {
3549                 unsigned raster_config_se = raster_config;
3550                 unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
3551                 unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
3552                 int idx = (se / 2) * 2;
3553
3554                 if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
3555                         raster_config_se &= ~SE_MAP_MASK;
3556
3557                         if (!se_mask[idx]) {
3558                                 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3);
3559                         } else {
3560                                 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0);
3561                         }
3562                 }
3563
3564                 pkr0_mask &= rb_mask;
3565                 pkr1_mask &= rb_mask;
3566                 if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
3567                         raster_config_se &= ~PKR_MAP_MASK;
3568
3569                         if (!pkr0_mask) {
3570                                 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3);
3571                         } else {
3572                                 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0);
3573                         }
3574                 }
3575
3576                 if (rb_per_se >= 2) {
3577                         unsigned rb0_mask = 1 << (se * rb_per_se);
3578                         unsigned rb1_mask = rb0_mask << 1;
3579
3580                         rb0_mask &= rb_mask;
3581                         rb1_mask &= rb_mask;
3582                         if (!rb0_mask || !rb1_mask) {
3583                                 raster_config_se &= ~RB_MAP_PKR0_MASK;
3584
3585                                 if (!rb0_mask) {
3586                                         raster_config_se |=
3587                                                 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3);
3588                                 } else {
3589                                         raster_config_se |=
3590                                                 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0);
3591                                 }
3592                         }
3593
3594                         if (rb_per_se > 2) {
3595                                 rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
3596                                 rb1_mask = rb0_mask << 1;
3597                                 rb0_mask &= rb_mask;
3598                                 rb1_mask &= rb_mask;
3599                                 if (!rb0_mask || !rb1_mask) {
3600                                         raster_config_se &= ~RB_MAP_PKR1_MASK;
3601
3602                                         if (!rb0_mask) {
3603                                                 raster_config_se |=
3604                                                         RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3);
3605                                         } else {
3606                                                 raster_config_se |=
3607                                                         RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0);
3608                                         }
3609                                 }
3610                         }
3611                 }
3612
3613                 /* GRBM_GFX_INDEX has a different offset on VI */
3614                 gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff);
3615                 WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se);
3616                 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3617         }
3618
3619         /* GRBM_GFX_INDEX has a different offset on VI */
3620         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3621 }
3622
3623 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
3624 {
3625         int i, j;
3626         u32 data;
3627         u32 raster_config = 0, raster_config_1 = 0;
3628         u32 active_rbs = 0;
3629         u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
3630                                         adev->gfx.config.max_sh_per_se;
3631         unsigned num_rb_pipes;
3632
3633         mutex_lock(&adev->grbm_idx_mutex);
3634         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3635                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3636                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3637                         data = gfx_v8_0_get_rb_active_bitmap(adev);
3638                         active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
3639                                                rb_bitmap_width_per_sh);
3640                 }
3641         }
3642         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3643
3644         adev->gfx.config.backend_enable_mask = active_rbs;
3645         adev->gfx.config.num_rbs = hweight32(active_rbs);
3646
3647         num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se *
3648                              adev->gfx.config.max_shader_engines, 16);
3649
3650         gfx_v8_0_raster_config(adev, &raster_config, &raster_config_1);
3651
3652         if (!adev->gfx.config.backend_enable_mask ||
3653                         adev->gfx.config.num_rbs >= num_rb_pipes) {
3654                 WREG32(mmPA_SC_RASTER_CONFIG, raster_config);
3655                 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3656         } else {
3657                 gfx_v8_0_write_harvested_raster_configs(adev, raster_config, raster_config_1,
3658                                                         adev->gfx.config.backend_enable_mask,
3659                                                         num_rb_pipes);
3660         }
3661
3662         /* cache the values for userspace */
3663         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3664                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3665                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3666                         adev->gfx.config.rb_config[i][j].rb_backend_disable =
3667                                 RREG32(mmCC_RB_BACKEND_DISABLE);
3668                         adev->gfx.config.rb_config[i][j].user_rb_backend_disable =
3669                                 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3670                         adev->gfx.config.rb_config[i][j].raster_config =
3671                                 RREG32(mmPA_SC_RASTER_CONFIG);
3672                         adev->gfx.config.rb_config[i][j].raster_config_1 =
3673                                 RREG32(mmPA_SC_RASTER_CONFIG_1);
3674                 }
3675         }
3676         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3677         mutex_unlock(&adev->grbm_idx_mutex);
3678 }
3679
3680 /**
3681  * gfx_v8_0_init_compute_vmid - gart enable
3682  *
3683  * @adev: amdgpu_device pointer
3684  *
3685  * Initialize compute vmid sh_mem registers
3686  *
3687  */
3688 #define DEFAULT_SH_MEM_BASES    (0x6000)
3689 #define FIRST_COMPUTE_VMID      (8)
3690 #define LAST_COMPUTE_VMID       (16)
3691 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
3692 {
3693         int i;
3694         uint32_t sh_mem_config;
3695         uint32_t sh_mem_bases;
3696
3697         /*
3698          * Configure apertures:
3699          * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
3700          * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
3701          * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
3702          */
3703         sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
3704
3705         sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
3706                         SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
3707                         SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
3708                         SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
3709                         MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
3710                         SH_MEM_CONFIG__PRIVATE_ATC_MASK;
3711
3712         mutex_lock(&adev->srbm_mutex);
3713         for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
3714                 vi_srbm_select(adev, 0, 0, 0, i);
3715                 /* CP and shaders */
3716                 WREG32(mmSH_MEM_CONFIG, sh_mem_config);
3717                 WREG32(mmSH_MEM_APE1_BASE, 1);
3718                 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3719                 WREG32(mmSH_MEM_BASES, sh_mem_bases);
3720         }
3721         vi_srbm_select(adev, 0, 0, 0, 0);
3722         mutex_unlock(&adev->srbm_mutex);
3723
3724         /* Initialize all compute VMIDs to have no GDS, GWS, or OA
3725            acccess. These should be enabled by FW for target VMIDs. */
3726         for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
3727                 WREG32(amdgpu_gds_reg_offset[i].mem_base, 0);
3728                 WREG32(amdgpu_gds_reg_offset[i].mem_size, 0);
3729                 WREG32(amdgpu_gds_reg_offset[i].gws, 0);
3730                 WREG32(amdgpu_gds_reg_offset[i].oa, 0);
3731         }
3732 }
3733
3734 static void gfx_v8_0_init_gds_vmid(struct amdgpu_device *adev)
3735 {
3736         int vmid;
3737
3738         /*
3739          * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
3740          * access. Compute VMIDs should be enabled by FW for target VMIDs,
3741          * the driver can enable them for graphics. VMID0 should maintain
3742          * access so that HWS firmware can save/restore entries.
3743          */
3744         for (vmid = 1; vmid < 16; vmid++) {
3745                 WREG32(amdgpu_gds_reg_offset[vmid].mem_base, 0);
3746                 WREG32(amdgpu_gds_reg_offset[vmid].mem_size, 0);
3747                 WREG32(amdgpu_gds_reg_offset[vmid].gws, 0);
3748                 WREG32(amdgpu_gds_reg_offset[vmid].oa, 0);
3749         }
3750 }
3751
3752 static void gfx_v8_0_config_init(struct amdgpu_device *adev)
3753 {
3754         switch (adev->asic_type) {
3755         default:
3756                 adev->gfx.config.double_offchip_lds_buf = 1;
3757                 break;
3758         case CHIP_CARRIZO:
3759         case CHIP_STONEY:
3760                 adev->gfx.config.double_offchip_lds_buf = 0;
3761                 break;
3762         }
3763 }
3764
3765 static void gfx_v8_0_constants_init(struct amdgpu_device *adev)
3766 {
3767         u32 tmp, sh_static_mem_cfg;
3768         int i;
3769
3770         WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF);
3771         WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3772         WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3773         WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
3774
3775         gfx_v8_0_tiling_mode_table_init(adev);
3776         gfx_v8_0_setup_rb(adev);
3777         gfx_v8_0_get_cu_info(adev);
3778         gfx_v8_0_config_init(adev);
3779
3780         /* XXX SH_MEM regs */
3781         /* where to put LDS, scratch, GPUVM in FSA64 space */
3782         sh_static_mem_cfg = REG_SET_FIELD(0, SH_STATIC_MEM_CONFIG,
3783                                    SWIZZLE_ENABLE, 1);
3784         sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3785                                    ELEMENT_SIZE, 1);
3786         sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3787                                    INDEX_STRIDE, 3);
3788         WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg);
3789
3790         mutex_lock(&adev->srbm_mutex);
3791         for (i = 0; i < adev->vm_manager.id_mgr[0].num_ids; i++) {
3792                 vi_srbm_select(adev, 0, 0, 0, i);
3793                 /* CP and shaders */
3794                 if (i == 0) {
3795                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
3796                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3797                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3798                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3799                         WREG32(mmSH_MEM_CONFIG, tmp);
3800                         WREG32(mmSH_MEM_BASES, 0);
3801                 } else {
3802                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
3803                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3804                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3805                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3806                         WREG32(mmSH_MEM_CONFIG, tmp);
3807                         tmp = adev->gmc.shared_aperture_start >> 48;
3808                         WREG32(mmSH_MEM_BASES, tmp);
3809                 }
3810
3811                 WREG32(mmSH_MEM_APE1_BASE, 1);
3812                 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3813         }
3814         vi_srbm_select(adev, 0, 0, 0, 0);
3815         mutex_unlock(&adev->srbm_mutex);
3816
3817         gfx_v8_0_init_compute_vmid(adev);
3818         gfx_v8_0_init_gds_vmid(adev);
3819
3820         mutex_lock(&adev->grbm_idx_mutex);
3821         /*
3822          * making sure that the following register writes will be broadcasted
3823          * to all the shaders
3824          */
3825         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3826
3827         WREG32(mmPA_SC_FIFO_SIZE,
3828                    (adev->gfx.config.sc_prim_fifo_size_frontend <<
3829                         PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
3830                    (adev->gfx.config.sc_prim_fifo_size_backend <<
3831                         PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
3832                    (adev->gfx.config.sc_hiz_tile_fifo_size <<
3833                         PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
3834                    (adev->gfx.config.sc_earlyz_tile_fifo_size <<
3835                         PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
3836
3837         tmp = RREG32(mmSPI_ARB_PRIORITY);
3838         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS0, 2);
3839         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS1, 2);
3840         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS2, 2);
3841         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS3, 2);
3842         WREG32(mmSPI_ARB_PRIORITY, tmp);
3843
3844         mutex_unlock(&adev->grbm_idx_mutex);
3845
3846 }
3847
3848 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3849 {
3850         u32 i, j, k;
3851         u32 mask;
3852
3853         mutex_lock(&adev->grbm_idx_mutex);
3854         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3855                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3856                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3857                         for (k = 0; k < adev->usec_timeout; k++) {
3858                                 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3859                                         break;
3860                                 udelay(1);
3861                         }
3862                         if (k == adev->usec_timeout) {
3863                                 gfx_v8_0_select_se_sh(adev, 0xffffffff,
3864                                                       0xffffffff, 0xffffffff);
3865                                 mutex_unlock(&adev->grbm_idx_mutex);
3866                                 DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
3867                                          i, j);
3868                                 return;
3869                         }
3870                 }
3871         }
3872         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3873         mutex_unlock(&adev->grbm_idx_mutex);
3874
3875         mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3876                 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3877                 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3878                 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3879         for (k = 0; k < adev->usec_timeout; k++) {
3880                 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3881                         break;
3882                 udelay(1);
3883         }
3884 }
3885
3886 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3887                                                bool enable)
3888 {
3889         u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3890
3891         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
3892         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
3893         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
3894         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
3895
3896         WREG32(mmCP_INT_CNTL_RING0, tmp);
3897 }
3898
3899 static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
3900 {
3901         adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
3902         /* csib */
3903         WREG32(mmRLC_CSIB_ADDR_HI,
3904                         adev->gfx.rlc.clear_state_gpu_addr >> 32);
3905         WREG32(mmRLC_CSIB_ADDR_LO,
3906                         adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
3907         WREG32(mmRLC_CSIB_LENGTH,
3908                         adev->gfx.rlc.clear_state_size);
3909 }
3910
3911 static void gfx_v8_0_parse_ind_reg_list(int *register_list_format,
3912                                 int ind_offset,
3913                                 int list_size,
3914                                 int *unique_indices,
3915                                 int *indices_count,
3916                                 int max_indices,
3917                                 int *ind_start_offsets,
3918                                 int *offset_count,
3919                                 int max_offset)
3920 {
3921         int indices;
3922         bool new_entry = true;
3923
3924         for (; ind_offset < list_size; ind_offset++) {
3925
3926                 if (new_entry) {
3927                         new_entry = false;
3928                         ind_start_offsets[*offset_count] = ind_offset;
3929                         *offset_count = *offset_count + 1;
3930                         BUG_ON(*offset_count >= max_offset);
3931                 }
3932
3933                 if (register_list_format[ind_offset] == 0xFFFFFFFF) {
3934                         new_entry = true;
3935                         continue;
3936                 }
3937
3938                 ind_offset += 2;
3939
3940                 /* look for the matching indice */
3941                 for (indices = 0;
3942                         indices < *indices_count;
3943                         indices++) {
3944                         if (unique_indices[indices] ==
3945                                 register_list_format[ind_offset])
3946                                 break;
3947                 }
3948
3949                 if (indices >= *indices_count) {
3950                         unique_indices[*indices_count] =
3951                                 register_list_format[ind_offset];
3952                         indices = *indices_count;
3953                         *indices_count = *indices_count + 1;
3954                         BUG_ON(*indices_count >= max_indices);
3955                 }
3956
3957                 register_list_format[ind_offset] = indices;
3958         }
3959 }
3960
3961 static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
3962 {
3963         int i, temp, data;
3964         int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0};
3965         int indices_count = 0;
3966         int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3967         int offset_count = 0;
3968
3969         int list_size;
3970         unsigned int *register_list_format =
3971                 kmemdup(adev->gfx.rlc.register_list_format,
3972                         adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
3973         if (!register_list_format)
3974                 return -ENOMEM;
3975
3976         gfx_v8_0_parse_ind_reg_list(register_list_format,
3977                                 RLC_FormatDirectRegListLength,
3978                                 adev->gfx.rlc.reg_list_format_size_bytes >> 2,
3979                                 unique_indices,
3980                                 &indices_count,
3981                                 ARRAY_SIZE(unique_indices),
3982                                 indirect_start_offsets,
3983                                 &offset_count,
3984                                 ARRAY_SIZE(indirect_start_offsets));
3985
3986         /* save and restore list */
3987         WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1);
3988
3989         WREG32(mmRLC_SRM_ARAM_ADDR, 0);
3990         for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
3991                 WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]);
3992
3993         /* indirect list */
3994         WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start);
3995         for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
3996                 WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]);
3997
3998         list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
3999         list_size = list_size >> 1;
4000         WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size);
4001         WREG32(mmRLC_GPM_SCRATCH_DATA, list_size);
4002
4003         /* starting offsets starts */
4004         WREG32(mmRLC_GPM_SCRATCH_ADDR,
4005                 adev->gfx.rlc.starting_offsets_start);
4006         for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
4007                 WREG32(mmRLC_GPM_SCRATCH_DATA,
4008                                 indirect_start_offsets[i]);
4009
4010         /* unique indices */
4011         temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
4012         data = mmRLC_SRM_INDEX_CNTL_DATA_0;
4013         for (i = 0; i < ARRAY_SIZE(unique_indices); i++) {
4014                 if (unique_indices[i] != 0) {
4015                         WREG32(temp + i, unique_indices[i] & 0x3FFFF);
4016                         WREG32(data + i, unique_indices[i] >> 20);
4017                 }
4018         }
4019         kfree(register_list_format);
4020
4021         return 0;
4022 }
4023
4024 static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
4025 {
4026         WREG32_FIELD(RLC_SRM_CNTL, SRM_ENABLE, 1);
4027 }
4028
4029 static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev)
4030 {
4031         uint32_t data;
4032
4033         WREG32_FIELD(CP_RB_WPTR_POLL_CNTL, IDLE_POLL_COUNT, 0x60);
4034
4035         data = REG_SET_FIELD(0, RLC_PG_DELAY, POWER_UP_DELAY, 0x10);
4036         data = REG_SET_FIELD(data, RLC_PG_DELAY, POWER_DOWN_DELAY, 0x10);
4037         data = REG_SET_FIELD(data, RLC_PG_DELAY, CMD_PROPAGATE_DELAY, 0x10);
4038         data = REG_SET_FIELD(data, RLC_PG_DELAY, MEM_SLEEP_DELAY, 0x10);
4039         WREG32(mmRLC_PG_DELAY, data);
4040
4041         WREG32_FIELD(RLC_PG_DELAY_2, SERDES_CMD_DELAY, 0x3);
4042         WREG32_FIELD(RLC_AUTO_PG_CTRL, GRBM_REG_SAVE_GFX_IDLE_THRESHOLD, 0x55f0);
4043
4044 }
4045
4046 static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
4047                                                 bool enable)
4048 {
4049         WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PU_ENABLE, enable ? 1 : 0);
4050 }
4051
4052 static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
4053                                                   bool enable)
4054 {
4055         WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PD_ENABLE, enable ? 1 : 0);
4056 }
4057
4058 static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable)
4059 {
4060         WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 0 : 1);
4061 }
4062
4063 static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
4064 {
4065         if ((adev->asic_type == CHIP_CARRIZO) ||
4066             (adev->asic_type == CHIP_STONEY)) {
4067                 gfx_v8_0_init_csb(adev);
4068                 gfx_v8_0_init_save_restore_list(adev);
4069                 gfx_v8_0_enable_save_restore_machine(adev);
4070                 WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
4071                 gfx_v8_0_init_power_gating(adev);
4072                 WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
4073         } else if ((adev->asic_type == CHIP_POLARIS11) ||
4074                    (adev->asic_type == CHIP_POLARIS12) ||
4075                    (adev->asic_type == CHIP_VEGAM)) {
4076                 gfx_v8_0_init_csb(adev);
4077                 gfx_v8_0_init_save_restore_list(adev);
4078                 gfx_v8_0_enable_save_restore_machine(adev);
4079                 gfx_v8_0_init_power_gating(adev);
4080         }
4081
4082 }
4083
4084 static void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
4085 {
4086         WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 0);
4087
4088         gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4089         gfx_v8_0_wait_for_rlc_serdes(adev);
4090 }
4091
4092 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
4093 {
4094         WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4095         udelay(50);
4096
4097         WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
4098         udelay(50);
4099 }
4100
4101 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
4102 {
4103         WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 1);
4104
4105         /* carrizo do enable cp interrupt after cp inited */
4106         if (!(adev->flags & AMD_IS_APU))
4107                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4108
4109         udelay(50);
4110 }
4111
4112 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
4113 {
4114         if (amdgpu_sriov_vf(adev)) {
4115                 gfx_v8_0_init_csb(adev);
4116                 return 0;
4117         }
4118
4119         adev->gfx.rlc.funcs->stop(adev);
4120         adev->gfx.rlc.funcs->reset(adev);
4121         gfx_v8_0_init_pg(adev);
4122         adev->gfx.rlc.funcs->start(adev);
4123
4124         return 0;
4125 }
4126
4127 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
4128 {
4129         u32 tmp = RREG32(mmCP_ME_CNTL);
4130
4131         if (enable) {
4132                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
4133                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
4134                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
4135         } else {
4136                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
4137                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
4138                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
4139         }
4140         WREG32(mmCP_ME_CNTL, tmp);
4141         udelay(50);
4142 }
4143
4144 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
4145 {
4146         u32 count = 0;
4147         const struct cs_section_def *sect = NULL;
4148         const struct cs_extent_def *ext = NULL;
4149
4150         /* begin clear state */
4151         count += 2;
4152         /* context control state */
4153         count += 3;
4154
4155         for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4156                 for (ext = sect->section; ext->extent != NULL; ++ext) {
4157                         if (sect->id == SECT_CONTEXT)
4158                                 count += 2 + ext->reg_count;
4159                         else
4160                                 return 0;
4161                 }
4162         }
4163         /* pa_sc_raster_config/pa_sc_raster_config1 */
4164         count += 4;
4165         /* end clear state */
4166         count += 2;
4167         /* clear state */
4168         count += 2;
4169
4170         return count;
4171 }
4172
4173 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
4174 {
4175         struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
4176         const struct cs_section_def *sect = NULL;
4177         const struct cs_extent_def *ext = NULL;
4178         int r, i;
4179
4180         /* init the CP */
4181         WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
4182         WREG32(mmCP_ENDIAN_SWAP, 0);
4183         WREG32(mmCP_DEVICE_ID, 1);
4184
4185         gfx_v8_0_cp_gfx_enable(adev, true);
4186
4187         r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
4188         if (r) {
4189                 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
4190                 return r;
4191         }
4192
4193         /* clear state buffer */
4194         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4195         amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4196
4197         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4198         amdgpu_ring_write(ring, 0x80000000);
4199         amdgpu_ring_write(ring, 0x80000000);
4200
4201         for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4202                 for (ext = sect->section; ext->extent != NULL; ++ext) {
4203                         if (sect->id == SECT_CONTEXT) {
4204                                 amdgpu_ring_write(ring,
4205                                        PACKET3(PACKET3_SET_CONTEXT_REG,
4206                                                ext->reg_count));
4207                                 amdgpu_ring_write(ring,
4208                                        ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
4209                                 for (i = 0; i < ext->reg_count; i++)
4210                                         amdgpu_ring_write(ring, ext->extent[i]);
4211                         }
4212                 }
4213         }
4214
4215         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4216         amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
4217         amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config);
4218         amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config_1);
4219
4220         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4221         amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4222
4223         amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4224         amdgpu_ring_write(ring, 0);
4225
4226         /* init the CE partitions */
4227         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4228         amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4229         amdgpu_ring_write(ring, 0x8000);
4230         amdgpu_ring_write(ring, 0x8000);
4231
4232         amdgpu_ring_commit(ring);
4233
4234         return 0;
4235 }
4236 static void gfx_v8_0_set_cpg_door_bell(struct amdgpu_device *adev, struct amdgpu_ring *ring)
4237 {
4238         u32 tmp;
4239         /* no gfx doorbells on iceland */
4240         if (adev->asic_type == CHIP_TOPAZ)
4241                 return;
4242
4243         tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
4244
4245         if (ring->use_doorbell) {
4246                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4247                                 DOORBELL_OFFSET, ring->doorbell_index);
4248                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4249                                                 DOORBELL_HIT, 0);
4250                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4251                                             DOORBELL_EN, 1);
4252         } else {
4253                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
4254         }
4255
4256         WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
4257
4258         if (adev->flags & AMD_IS_APU)
4259                 return;
4260
4261         tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
4262                                         DOORBELL_RANGE_LOWER,
4263                                         adev->doorbell_index.gfx_ring0);
4264         WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
4265
4266         WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
4267                 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
4268 }
4269
4270 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
4271 {
4272         struct amdgpu_ring *ring;
4273         u32 tmp;
4274         u32 rb_bufsz;
4275         u64 rb_addr, rptr_addr, wptr_gpu_addr;
4276
4277         /* Set the write pointer delay */
4278         WREG32(mmCP_RB_WPTR_DELAY, 0);
4279
4280         /* set the RB to use vmid 0 */
4281         WREG32(mmCP_RB_VMID, 0);
4282
4283         /* Set ring buffer size */
4284         ring = &adev->gfx.gfx_ring[0];
4285         rb_bufsz = order_base_2(ring->ring_size / 8);
4286         tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
4287         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
4288         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
4289         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
4290 #ifdef __BIG_ENDIAN
4291         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
4292 #endif
4293         WREG32(mmCP_RB0_CNTL, tmp);
4294
4295         /* Initialize the ring buffer's read and write pointers */
4296         WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
4297         ring->wptr = 0;
4298         WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
4299
4300         /* set the wb address wether it's enabled or not */
4301         rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4302         WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
4303         WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
4304
4305         wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4306         WREG32(mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
4307         WREG32(mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
4308         mdelay(1);
4309         WREG32(mmCP_RB0_CNTL, tmp);
4310
4311         rb_addr = ring->gpu_addr >> 8;
4312         WREG32(mmCP_RB0_BASE, rb_addr);
4313         WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
4314
4315         gfx_v8_0_set_cpg_door_bell(adev, ring);
4316         /* start the ring */
4317         amdgpu_ring_clear_ring(ring);
4318         gfx_v8_0_cp_gfx_start(adev);
4319         ring->sched.ready = true;
4320
4321         return 0;
4322 }
4323
4324 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
4325 {
4326         if (enable) {
4327                 WREG32(mmCP_MEC_CNTL, 0);
4328         } else {
4329                 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
4330                 adev->gfx.kiq.ring.sched.ready = false;
4331         }
4332         udelay(50);
4333 }
4334
4335 /* KIQ functions */
4336 static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring)
4337 {
4338         uint32_t tmp;
4339         struct amdgpu_device *adev = ring->adev;
4340
4341         /* tell RLC which is KIQ queue */
4342         tmp = RREG32(mmRLC_CP_SCHEDULERS);
4343         tmp &= 0xffffff00;
4344         tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
4345         WREG32(mmRLC_CP_SCHEDULERS, tmp);
4346         tmp |= 0x80;
4347         WREG32(mmRLC_CP_SCHEDULERS, tmp);
4348 }
4349
4350 static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev)
4351 {
4352         struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
4353         uint64_t queue_mask = 0;
4354         int r, i;
4355
4356         for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
4357                 if (!test_bit(i, adev->gfx.mec.queue_bitmap))
4358                         continue;
4359
4360                 /* This situation may be hit in the future if a new HW
4361                  * generation exposes more than 64 queues. If so, the
4362                  * definition of queue_mask needs updating */
4363                 if (WARN_ON(i >= (sizeof(queue_mask)*8))) {
4364                         DRM_ERROR("Invalid KCQ enabled: %d\n", i);
4365                         break;
4366                 }
4367
4368                 queue_mask |= (1ull << i);
4369         }
4370
4371         r = amdgpu_ring_alloc(kiq_ring, (8 * adev->gfx.num_compute_rings) + 8);
4372         if (r) {
4373                 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
4374                 return r;
4375         }
4376         /* set resources */
4377         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
4378         amdgpu_ring_write(kiq_ring, 0); /* vmid_mask:0 queue_type:0 (KIQ) */
4379         amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */
4380         amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */
4381         amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
4382         amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
4383         amdgpu_ring_write(kiq_ring, 0); /* oac mask */
4384         amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
4385         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4386                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4387                 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
4388                 uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4389
4390                 /* map queues */
4391                 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
4392                 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
4393                 amdgpu_ring_write(kiq_ring,
4394                                   PACKET3_MAP_QUEUES_NUM_QUEUES(1));
4395                 amdgpu_ring_write(kiq_ring,
4396                                   PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index) |
4397                                   PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
4398                                   PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
4399                                   PACKET3_MAP_QUEUES_ME(ring->me == 1 ? 0 : 1)); /* doorbell */
4400                 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
4401                 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
4402                 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
4403                 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
4404         }
4405
4406         amdgpu_ring_commit(kiq_ring);
4407
4408         return 0;
4409 }
4410
4411 static int gfx_v8_0_deactivate_hqd(struct amdgpu_device *adev, u32 req)
4412 {
4413         int i, r = 0;
4414
4415         if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) {
4416                 WREG32_FIELD(CP_HQD_DEQUEUE_REQUEST, DEQUEUE_REQ, req);
4417                 for (i = 0; i < adev->usec_timeout; i++) {
4418                         if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK))
4419                                 break;
4420                         udelay(1);
4421                 }
4422                 if (i == adev->usec_timeout)
4423                         r = -ETIMEDOUT;
4424         }
4425         WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
4426         WREG32(mmCP_HQD_PQ_RPTR, 0);
4427         WREG32(mmCP_HQD_PQ_WPTR, 0);
4428
4429         return r;
4430 }
4431
4432 static void gfx_v8_0_mqd_set_priority(struct amdgpu_ring *ring, struct vi_mqd *mqd)
4433 {
4434         struct amdgpu_device *adev = ring->adev;
4435
4436         if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
4437                 if (amdgpu_gfx_is_high_priority_compute_queue(adev, ring->queue)) {
4438                         mqd->cp_hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH;
4439                         mqd->cp_hqd_queue_priority =
4440                                 AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM;
4441                 }
4442         }
4443 }
4444
4445 static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
4446 {
4447         struct amdgpu_device *adev = ring->adev;
4448         struct vi_mqd *mqd = ring->mqd_ptr;
4449         uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
4450         uint32_t tmp;
4451
4452         mqd->header = 0xC0310800;
4453         mqd->compute_pipelinestat_enable = 0x00000001;
4454         mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4455         mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4456         mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4457         mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4458         mqd->compute_misc_reserved = 0x00000003;
4459         mqd->dynamic_cu_mask_addr_lo = lower_32_bits(ring->mqd_gpu_addr
4460                                                      + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4461         mqd->dynamic_cu_mask_addr_hi = upper_32_bits(ring->mqd_gpu_addr
4462                                                      + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4463         eop_base_addr = ring->eop_gpu_addr >> 8;
4464         mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
4465         mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
4466
4467         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4468         tmp = RREG32(mmCP_HQD_EOP_CONTROL);
4469         tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
4470                         (order_base_2(GFX8_MEC_HPD_SIZE / 4) - 1));
4471
4472         mqd->cp_hqd_eop_control = tmp;
4473
4474         /* enable doorbell? */
4475         tmp = REG_SET_FIELD(RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL),
4476                             CP_HQD_PQ_DOORBELL_CONTROL,
4477                             DOORBELL_EN,
4478                             ring->use_doorbell ? 1 : 0);
4479
4480         mqd->cp_hqd_pq_doorbell_control = tmp;
4481
4482         /* set the pointer to the MQD */
4483         mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
4484         mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
4485
4486         /* set MQD vmid to 0 */
4487         tmp = RREG32(mmCP_MQD_CONTROL);
4488         tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4489         mqd->cp_mqd_control = tmp;
4490
4491         /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4492         hqd_gpu_addr = ring->gpu_addr >> 8;
4493         mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4494         mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4495
4496         /* set up the HQD, this is similar to CP_RB0_CNTL */
4497         tmp = RREG32(mmCP_HQD_PQ_CONTROL);
4498         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4499                             (order_base_2(ring->ring_size / 4) - 1));
4500         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4501                         ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
4502 #ifdef __BIG_ENDIAN
4503         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
4504 #endif
4505         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
4506         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
4507         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4508         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4509         mqd->cp_hqd_pq_control = tmp;
4510
4511         /* set the wb address whether it's enabled or not */
4512         wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4513         mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4514         mqd->cp_hqd_pq_rptr_report_addr_hi =
4515                 upper_32_bits(wb_gpu_addr) & 0xffff;
4516
4517         /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4518         wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4519         mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
4520         mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4521
4522         tmp = 0;
4523         /* enable the doorbell if requested */
4524         if (ring->use_doorbell) {
4525                 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4526                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4527                                 DOORBELL_OFFSET, ring->doorbell_index);
4528
4529                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4530                                          DOORBELL_EN, 1);
4531                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4532                                          DOORBELL_SOURCE, 0);
4533                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4534                                          DOORBELL_HIT, 0);
4535         }
4536
4537         mqd->cp_hqd_pq_doorbell_control = tmp;
4538
4539         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4540         ring->wptr = 0;
4541         mqd->cp_hqd_pq_wptr = ring->wptr;
4542         mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
4543
4544         /* set the vmid for the queue */
4545         mqd->cp_hqd_vmid = 0;
4546
4547         tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
4548         tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
4549         mqd->cp_hqd_persistent_state = tmp;
4550
4551         /* set MTYPE */
4552         tmp = RREG32(mmCP_HQD_IB_CONTROL);
4553         tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
4554         tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MTYPE, 3);
4555         mqd->cp_hqd_ib_control = tmp;
4556
4557         tmp = RREG32(mmCP_HQD_IQ_TIMER);
4558         tmp = REG_SET_FIELD(tmp, CP_HQD_IQ_TIMER, MTYPE, 3);
4559         mqd->cp_hqd_iq_timer = tmp;
4560
4561         tmp = RREG32(mmCP_HQD_CTX_SAVE_CONTROL);
4562         tmp = REG_SET_FIELD(tmp, CP_HQD_CTX_SAVE_CONTROL, MTYPE, 3);
4563         mqd->cp_hqd_ctx_save_control = tmp;
4564
4565         /* defaults */
4566         mqd->cp_hqd_eop_rptr = RREG32(mmCP_HQD_EOP_RPTR);
4567         mqd->cp_hqd_eop_wptr = RREG32(mmCP_HQD_EOP_WPTR);
4568         mqd->cp_hqd_ctx_save_base_addr_lo = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_LO);
4569         mqd->cp_hqd_ctx_save_base_addr_hi = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_HI);
4570         mqd->cp_hqd_cntl_stack_offset = RREG32(mmCP_HQD_CNTL_STACK_OFFSET);
4571         mqd->cp_hqd_cntl_stack_size = RREG32(mmCP_HQD_CNTL_STACK_SIZE);
4572         mqd->cp_hqd_wg_state_offset = RREG32(mmCP_HQD_WG_STATE_OFFSET);
4573         mqd->cp_hqd_ctx_save_size = RREG32(mmCP_HQD_CTX_SAVE_SIZE);
4574         mqd->cp_hqd_eop_done_events = RREG32(mmCP_HQD_EOP_EVENTS);
4575         mqd->cp_hqd_error = RREG32(mmCP_HQD_ERROR);
4576         mqd->cp_hqd_eop_wptr_mem = RREG32(mmCP_HQD_EOP_WPTR_MEM);
4577         mqd->cp_hqd_eop_dones = RREG32(mmCP_HQD_EOP_DONES);
4578
4579         /* set static priority for a queue/ring */
4580         gfx_v8_0_mqd_set_priority(ring, mqd);
4581         mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM);
4582
4583         /* map_queues packet doesn't need activate the queue,
4584          * so only kiq need set this field.
4585          */
4586         if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
4587                 mqd->cp_hqd_active = 1;
4588
4589         return 0;
4590 }
4591
4592 int gfx_v8_0_mqd_commit(struct amdgpu_device *adev,
4593                         struct vi_mqd *mqd)
4594 {
4595         uint32_t mqd_reg;
4596         uint32_t *mqd_data;
4597
4598         /* HQD registers extend from mmCP_MQD_BASE_ADDR to mmCP_HQD_ERROR */
4599         mqd_data = &mqd->cp_mqd_base_addr_lo;
4600
4601         /* disable wptr polling */
4602         WREG32_FIELD(CP_PQ_WPTR_POLL_CNTL, EN, 0);
4603
4604         /* program all HQD registers */
4605         for (mqd_reg = mmCP_HQD_VMID; mqd_reg <= mmCP_HQD_EOP_CONTROL; mqd_reg++)
4606                 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4607
4608         /* Tonga errata: EOP RPTR/WPTR should be left unmodified.
4609          * This is safe since EOP RPTR==WPTR for any inactive HQD
4610          * on ASICs that do not support context-save.
4611          * EOP writes/reads can start anywhere in the ring.
4612          */
4613         if (adev->asic_type != CHIP_TONGA) {
4614                 WREG32(mmCP_HQD_EOP_RPTR, mqd->cp_hqd_eop_rptr);
4615                 WREG32(mmCP_HQD_EOP_WPTR, mqd->cp_hqd_eop_wptr);
4616                 WREG32(mmCP_HQD_EOP_WPTR_MEM, mqd->cp_hqd_eop_wptr_mem);
4617         }
4618
4619         for (mqd_reg = mmCP_HQD_EOP_EVENTS; mqd_reg <= mmCP_HQD_ERROR; mqd_reg++)
4620                 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4621
4622         /* activate the HQD */
4623         for (mqd_reg = mmCP_MQD_BASE_ADDR; mqd_reg <= mmCP_HQD_ACTIVE; mqd_reg++)
4624                 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4625
4626         return 0;
4627 }
4628
4629 static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring)
4630 {
4631         struct amdgpu_device *adev = ring->adev;
4632         struct vi_mqd *mqd = ring->mqd_ptr;
4633         int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
4634
4635         gfx_v8_0_kiq_setting(ring);
4636
4637         if (adev->in_gpu_reset) { /* for GPU_RESET case */
4638                 /* reset MQD to a clean status */
4639                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4640                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4641
4642                 /* reset ring buffer */
4643                 ring->wptr = 0;
4644                 amdgpu_ring_clear_ring(ring);
4645                 mutex_lock(&adev->srbm_mutex);
4646                 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4647                 gfx_v8_0_mqd_commit(adev, mqd);
4648                 vi_srbm_select(adev, 0, 0, 0, 0);
4649                 mutex_unlock(&adev->srbm_mutex);
4650         } else {
4651                 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4652                 ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4653                 ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
4654                 mutex_lock(&adev->srbm_mutex);
4655                 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4656                 gfx_v8_0_mqd_init(ring);
4657                 gfx_v8_0_mqd_commit(adev, mqd);
4658                 vi_srbm_select(adev, 0, 0, 0, 0);
4659                 mutex_unlock(&adev->srbm_mutex);
4660
4661                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4662                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4663         }
4664
4665         return 0;
4666 }
4667
4668 static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring)
4669 {
4670         struct amdgpu_device *adev = ring->adev;
4671         struct vi_mqd *mqd = ring->mqd_ptr;
4672         int mqd_idx = ring - &adev->gfx.compute_ring[0];
4673
4674         if (!adev->in_gpu_reset && !adev->in_suspend) {
4675                 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4676                 ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4677                 ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
4678                 mutex_lock(&adev->srbm_mutex);
4679                 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4680                 gfx_v8_0_mqd_init(ring);
4681                 vi_srbm_select(adev, 0, 0, 0, 0);
4682                 mutex_unlock(&adev->srbm_mutex);
4683
4684                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4685                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4686         } else if (adev->in_gpu_reset) { /* for GPU_RESET case */
4687                 /* reset MQD to a clean status */
4688                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4689                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4690                 /* reset ring buffer */
4691                 ring->wptr = 0;
4692                 amdgpu_ring_clear_ring(ring);
4693         } else {
4694                 amdgpu_ring_clear_ring(ring);
4695         }
4696         return 0;
4697 }
4698
4699 static void gfx_v8_0_set_mec_doorbell_range(struct amdgpu_device *adev)
4700 {
4701         if (adev->asic_type > CHIP_TONGA) {
4702                 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, adev->doorbell_index.kiq << 2);
4703                 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, adev->doorbell_index.mec_ring7 << 2);
4704         }
4705         /* enable doorbells */
4706         WREG32_FIELD(CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4707 }
4708
4709 static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev)
4710 {
4711         struct amdgpu_ring *ring;
4712         int r;
4713
4714         ring = &adev->gfx.kiq.ring;
4715
4716         r = amdgpu_bo_reserve(ring->mqd_obj, false);
4717         if (unlikely(r != 0))
4718                 return r;
4719
4720         r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4721         if (unlikely(r != 0))
4722                 return r;
4723
4724         gfx_v8_0_kiq_init_queue(ring);
4725         amdgpu_bo_kunmap(ring->mqd_obj);
4726         ring->mqd_ptr = NULL;
4727         amdgpu_bo_unreserve(ring->mqd_obj);
4728         ring->sched.ready = true;
4729         return 0;
4730 }
4731
4732 static int gfx_v8_0_kcq_resume(struct amdgpu_device *adev)
4733 {
4734         struct amdgpu_ring *ring = NULL;
4735         int r = 0, i;
4736
4737         gfx_v8_0_cp_compute_enable(adev, true);
4738
4739         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4740                 ring = &adev->gfx.compute_ring[i];
4741
4742                 r = amdgpu_bo_reserve(ring->mqd_obj, false);
4743                 if (unlikely(r != 0))
4744                         goto done;
4745                 r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4746                 if (!r) {
4747                         r = gfx_v8_0_kcq_init_queue(ring);
4748                         amdgpu_bo_kunmap(ring->mqd_obj);
4749                         ring->mqd_ptr = NULL;
4750                 }
4751                 amdgpu_bo_unreserve(ring->mqd_obj);
4752                 if (r)
4753                         goto done;
4754         }
4755
4756         gfx_v8_0_set_mec_doorbell_range(adev);
4757
4758         r = gfx_v8_0_kiq_kcq_enable(adev);
4759         if (r)
4760                 goto done;
4761
4762 done:
4763         return r;
4764 }
4765
4766 static int gfx_v8_0_cp_test_all_rings(struct amdgpu_device *adev)
4767 {
4768         int r, i;
4769         struct amdgpu_ring *ring;
4770
4771         /* collect all the ring_tests here, gfx, kiq, compute */
4772         ring = &adev->gfx.gfx_ring[0];
4773         r = amdgpu_ring_test_helper(ring);
4774         if (r)
4775                 return r;
4776
4777         ring = &adev->gfx.kiq.ring;
4778         r = amdgpu_ring_test_helper(ring);
4779         if (r)
4780                 return r;
4781
4782         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4783                 ring = &adev->gfx.compute_ring[i];
4784                 amdgpu_ring_test_helper(ring);
4785         }
4786
4787         return 0;
4788 }
4789
4790 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
4791 {
4792         int r;
4793
4794         if (!(adev->flags & AMD_IS_APU))
4795                 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4796
4797         r = gfx_v8_0_kiq_resume(adev);
4798         if (r)
4799                 return r;
4800
4801         r = gfx_v8_0_cp_gfx_resume(adev);
4802         if (r)
4803                 return r;
4804
4805         r = gfx_v8_0_kcq_resume(adev);
4806         if (r)
4807                 return r;
4808
4809         r = gfx_v8_0_cp_test_all_rings(adev);
4810         if (r)
4811                 return r;
4812
4813         gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4814
4815         return 0;
4816 }
4817
4818 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
4819 {
4820         gfx_v8_0_cp_gfx_enable(adev, enable);
4821         gfx_v8_0_cp_compute_enable(adev, enable);
4822 }
4823
4824 static int gfx_v8_0_hw_init(void *handle)
4825 {
4826         int r;
4827         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4828
4829         gfx_v8_0_init_golden_registers(adev);
4830         gfx_v8_0_constants_init(adev);
4831
4832         r = adev->gfx.rlc.funcs->resume(adev);
4833         if (r)
4834                 return r;
4835
4836         r = gfx_v8_0_cp_resume(adev);
4837
4838         return r;
4839 }
4840
4841 static int gfx_v8_0_kcq_disable(struct amdgpu_device *adev)
4842 {
4843         int r, i;
4844         struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
4845
4846         r = amdgpu_ring_alloc(kiq_ring, 6 * adev->gfx.num_compute_rings);
4847         if (r)
4848                 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
4849
4850         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4851                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4852
4853                 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
4854                 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
4855                                                 PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */
4856                                                 PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
4857                                                 PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) |
4858                                                 PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
4859                 amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
4860                 amdgpu_ring_write(kiq_ring, 0);
4861                 amdgpu_ring_write(kiq_ring, 0);
4862                 amdgpu_ring_write(kiq_ring, 0);
4863         }
4864         r = amdgpu_ring_test_helper(kiq_ring);
4865         if (r)
4866                 DRM_ERROR("KCQ disable failed\n");
4867
4868         return r;
4869 }
4870
4871 static bool gfx_v8_0_is_idle(void *handle)
4872 {
4873         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4874
4875         if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE)
4876                 || RREG32(mmGRBM_STATUS2) != 0x8)
4877                 return false;
4878         else
4879                 return true;
4880 }
4881
4882 static bool gfx_v8_0_rlc_is_idle(void *handle)
4883 {
4884         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4885
4886         if (RREG32(mmGRBM_STATUS2) != 0x8)
4887                 return false;
4888         else
4889                 return true;
4890 }
4891
4892 static int gfx_v8_0_wait_for_rlc_idle(void *handle)
4893 {
4894         unsigned int i;
4895         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4896
4897         for (i = 0; i < adev->usec_timeout; i++) {
4898                 if (gfx_v8_0_rlc_is_idle(handle))
4899                         return 0;
4900
4901                 udelay(1);
4902         }
4903         return -ETIMEDOUT;
4904 }
4905
4906 static int gfx_v8_0_wait_for_idle(void *handle)
4907 {
4908         unsigned int i;
4909         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4910
4911         for (i = 0; i < adev->usec_timeout; i++) {
4912                 if (gfx_v8_0_is_idle(handle))
4913                         return 0;
4914
4915                 udelay(1);
4916         }
4917         return -ETIMEDOUT;
4918 }
4919
4920 static int gfx_v8_0_hw_fini(void *handle)
4921 {
4922         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4923
4924         amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
4925         amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
4926
4927         amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
4928
4929         amdgpu_irq_put(adev, &adev->gfx.sq_irq, 0);
4930
4931         /* disable KCQ to avoid CPC touch memory not valid anymore */
4932         gfx_v8_0_kcq_disable(adev);
4933
4934         if (amdgpu_sriov_vf(adev)) {
4935                 pr_debug("For SRIOV client, shouldn't do anything.\n");
4936                 return 0;
4937         }
4938         amdgpu_gfx_rlc_enter_safe_mode(adev);
4939         if (!gfx_v8_0_wait_for_idle(adev))
4940                 gfx_v8_0_cp_enable(adev, false);
4941         else
4942                 pr_err("cp is busy, skip halt cp\n");
4943         if (!gfx_v8_0_wait_for_rlc_idle(adev))
4944                 adev->gfx.rlc.funcs->stop(adev);
4945         else
4946                 pr_err("rlc is busy, skip halt rlc\n");
4947         amdgpu_gfx_rlc_exit_safe_mode(adev);
4948
4949         return 0;
4950 }
4951
4952 static int gfx_v8_0_suspend(void *handle)
4953 {
4954         return gfx_v8_0_hw_fini(handle);
4955 }
4956
4957 static int gfx_v8_0_resume(void *handle)
4958 {
4959         return gfx_v8_0_hw_init(handle);
4960 }
4961
4962 static bool gfx_v8_0_check_soft_reset(void *handle)
4963 {
4964         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4965         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4966         u32 tmp;
4967
4968         /* GRBM_STATUS */
4969         tmp = RREG32(mmGRBM_STATUS);
4970         if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
4971                    GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
4972                    GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
4973                    GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
4974                    GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
4975                    GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK |
4976                    GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
4977                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4978                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4979                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4980                                                 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
4981                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4982                                                 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
4983         }
4984
4985         /* GRBM_STATUS2 */
4986         tmp = RREG32(mmGRBM_STATUS2);
4987         if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
4988                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4989                                                 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4990
4991         if (REG_GET_FIELD(tmp, GRBM_STATUS2, CPF_BUSY) ||
4992             REG_GET_FIELD(tmp, GRBM_STATUS2, CPC_BUSY) ||
4993             REG_GET_FIELD(tmp, GRBM_STATUS2, CPG_BUSY)) {
4994                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4995                                                 SOFT_RESET_CPF, 1);
4996                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4997                                                 SOFT_RESET_CPC, 1);
4998                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4999                                                 SOFT_RESET_CPG, 1);
5000                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET,
5001                                                 SOFT_RESET_GRBM, 1);
5002         }
5003
5004         /* SRBM_STATUS */
5005         tmp = RREG32(mmSRBM_STATUS);
5006         if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
5007                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5008                                                 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5009         if (REG_GET_FIELD(tmp, SRBM_STATUS, SEM_BUSY))
5010                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5011                                                 SRBM_SOFT_RESET, SOFT_RESET_SEM, 1);
5012
5013         if (grbm_soft_reset || srbm_soft_reset) {
5014                 adev->gfx.grbm_soft_reset = grbm_soft_reset;
5015                 adev->gfx.srbm_soft_reset = srbm_soft_reset;
5016                 return true;
5017         } else {
5018                 adev->gfx.grbm_soft_reset = 0;
5019                 adev->gfx.srbm_soft_reset = 0;
5020                 return false;
5021         }
5022 }
5023
5024 static int gfx_v8_0_pre_soft_reset(void *handle)
5025 {
5026         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5027         u32 grbm_soft_reset = 0;
5028
5029         if ((!adev->gfx.grbm_soft_reset) &&
5030             (!adev->gfx.srbm_soft_reset))
5031                 return 0;
5032
5033         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5034
5035         /* stop the rlc */
5036         adev->gfx.rlc.funcs->stop(adev);
5037
5038         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5039             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5040                 /* Disable GFX parsing/prefetching */
5041                 gfx_v8_0_cp_gfx_enable(adev, false);
5042
5043         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5044             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5045             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5046             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5047                 int i;
5048
5049                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5050                         struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5051
5052                         mutex_lock(&adev->srbm_mutex);
5053                         vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5054                         gfx_v8_0_deactivate_hqd(adev, 2);
5055                         vi_srbm_select(adev, 0, 0, 0, 0);
5056                         mutex_unlock(&adev->srbm_mutex);
5057                 }
5058                 /* Disable MEC parsing/prefetching */
5059                 gfx_v8_0_cp_compute_enable(adev, false);
5060         }
5061
5062        return 0;
5063 }
5064
5065 static int gfx_v8_0_soft_reset(void *handle)
5066 {
5067         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5068         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5069         u32 tmp;
5070
5071         if ((!adev->gfx.grbm_soft_reset) &&
5072             (!adev->gfx.srbm_soft_reset))
5073                 return 0;
5074
5075         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5076         srbm_soft_reset = adev->gfx.srbm_soft_reset;
5077
5078         if (grbm_soft_reset || srbm_soft_reset) {
5079                 tmp = RREG32(mmGMCON_DEBUG);
5080                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 1);
5081                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 1);
5082                 WREG32(mmGMCON_DEBUG, tmp);
5083                 udelay(50);
5084         }
5085
5086         if (grbm_soft_reset) {
5087                 tmp = RREG32(mmGRBM_SOFT_RESET);
5088                 tmp |= grbm_soft_reset;
5089                 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5090                 WREG32(mmGRBM_SOFT_RESET, tmp);
5091                 tmp = RREG32(mmGRBM_SOFT_RESET);
5092
5093                 udelay(50);
5094
5095                 tmp &= ~grbm_soft_reset;
5096                 WREG32(mmGRBM_SOFT_RESET, tmp);
5097                 tmp = RREG32(mmGRBM_SOFT_RESET);
5098         }
5099
5100         if (srbm_soft_reset) {
5101                 tmp = RREG32(mmSRBM_SOFT_RESET);
5102                 tmp |= srbm_soft_reset;
5103                 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5104                 WREG32(mmSRBM_SOFT_RESET, tmp);
5105                 tmp = RREG32(mmSRBM_SOFT_RESET);
5106
5107                 udelay(50);
5108
5109                 tmp &= ~srbm_soft_reset;
5110                 WREG32(mmSRBM_SOFT_RESET, tmp);
5111                 tmp = RREG32(mmSRBM_SOFT_RESET);
5112         }
5113
5114         if (grbm_soft_reset || srbm_soft_reset) {
5115                 tmp = RREG32(mmGMCON_DEBUG);
5116                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 0);
5117                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 0);
5118                 WREG32(mmGMCON_DEBUG, tmp);
5119         }
5120
5121         /* Wait a little for things to settle down */
5122         udelay(50);
5123
5124         return 0;
5125 }
5126
5127 static int gfx_v8_0_post_soft_reset(void *handle)
5128 {
5129         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5130         u32 grbm_soft_reset = 0;
5131
5132         if ((!adev->gfx.grbm_soft_reset) &&
5133             (!adev->gfx.srbm_soft_reset))
5134                 return 0;
5135
5136         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5137
5138         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5139             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5140             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5141             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5142                 int i;
5143
5144                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5145                         struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5146
5147                         mutex_lock(&adev->srbm_mutex);
5148                         vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5149                         gfx_v8_0_deactivate_hqd(adev, 2);
5150                         vi_srbm_select(adev, 0, 0, 0, 0);
5151                         mutex_unlock(&adev->srbm_mutex);
5152                 }
5153                 gfx_v8_0_kiq_resume(adev);
5154                 gfx_v8_0_kcq_resume(adev);
5155         }
5156
5157         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5158             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5159                 gfx_v8_0_cp_gfx_resume(adev);
5160
5161         gfx_v8_0_cp_test_all_rings(adev);
5162
5163         adev->gfx.rlc.funcs->start(adev);
5164
5165         return 0;
5166 }
5167
5168 /**
5169  * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
5170  *
5171  * @adev: amdgpu_device pointer
5172  *
5173  * Fetches a GPU clock counter snapshot.
5174  * Returns the 64 bit clock counter snapshot.
5175  */
5176 static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
5177 {
5178         uint64_t clock;
5179
5180         mutex_lock(&adev->gfx.gpu_clock_mutex);
5181         WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
5182         clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
5183                 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5184         mutex_unlock(&adev->gfx.gpu_clock_mutex);
5185         return clock;
5186 }
5187
5188 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
5189                                           uint32_t vmid,
5190                                           uint32_t gds_base, uint32_t gds_size,
5191                                           uint32_t gws_base, uint32_t gws_size,
5192                                           uint32_t oa_base, uint32_t oa_size)
5193 {
5194         /* GDS Base */
5195         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5196         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5197                                 WRITE_DATA_DST_SEL(0)));
5198         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
5199         amdgpu_ring_write(ring, 0);
5200         amdgpu_ring_write(ring, gds_base);
5201
5202         /* GDS Size */
5203         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5204         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5205                                 WRITE_DATA_DST_SEL(0)));
5206         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
5207         amdgpu_ring_write(ring, 0);
5208         amdgpu_ring_write(ring, gds_size);
5209
5210         /* GWS */
5211         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5212         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5213                                 WRITE_DATA_DST_SEL(0)));
5214         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
5215         amdgpu_ring_write(ring, 0);
5216         amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
5217
5218         /* OA */
5219         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5220         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5221                                 WRITE_DATA_DST_SEL(0)));
5222         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
5223         amdgpu_ring_write(ring, 0);
5224         amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
5225 }
5226
5227 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
5228 {
5229         WREG32(mmSQ_IND_INDEX,
5230                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5231                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5232                 (address << SQ_IND_INDEX__INDEX__SHIFT) |
5233                 (SQ_IND_INDEX__FORCE_READ_MASK));
5234         return RREG32(mmSQ_IND_DATA);
5235 }
5236
5237 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
5238                            uint32_t wave, uint32_t thread,
5239                            uint32_t regno, uint32_t num, uint32_t *out)
5240 {
5241         WREG32(mmSQ_IND_INDEX,
5242                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5243                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5244                 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
5245                 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
5246                 (SQ_IND_INDEX__FORCE_READ_MASK) |
5247                 (SQ_IND_INDEX__AUTO_INCR_MASK));
5248         while (num--)
5249                 *(out++) = RREG32(mmSQ_IND_DATA);
5250 }
5251
5252 static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
5253 {
5254         /* type 0 wave data */
5255         dst[(*no_fields)++] = 0;
5256         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
5257         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
5258         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
5259         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
5260         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
5261         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
5262         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
5263         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
5264         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
5265         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
5266         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
5267         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
5268         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_LO);
5269         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_HI);
5270         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_LO);
5271         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI);
5272         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
5273         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
5274 }
5275
5276 static void gfx_v8_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
5277                                      uint32_t wave, uint32_t start,
5278                                      uint32_t size, uint32_t *dst)
5279 {
5280         wave_read_regs(
5281                 adev, simd, wave, 0,
5282                 start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
5283 }
5284
5285
5286 static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = {
5287         .get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter,
5288         .select_se_sh = &gfx_v8_0_select_se_sh,
5289         .read_wave_data = &gfx_v8_0_read_wave_data,
5290         .read_wave_sgprs = &gfx_v8_0_read_wave_sgprs,
5291         .select_me_pipe_q = &gfx_v8_0_select_me_pipe_q
5292 };
5293
5294 static int gfx_v8_0_early_init(void *handle)
5295 {
5296         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5297
5298         adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
5299         adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
5300         adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
5301         gfx_v8_0_set_ring_funcs(adev);
5302         gfx_v8_0_set_irq_funcs(adev);
5303         gfx_v8_0_set_gds_init(adev);
5304         gfx_v8_0_set_rlc_funcs(adev);
5305
5306         return 0;
5307 }
5308
5309 static int gfx_v8_0_late_init(void *handle)
5310 {
5311         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5312         int r;
5313
5314         r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
5315         if (r)
5316                 return r;
5317
5318         r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
5319         if (r)
5320                 return r;
5321
5322         /* requires IBs so do in late init after IB pool is initialized */
5323         r = gfx_v8_0_do_edc_gpr_workarounds(adev);
5324         if (r)
5325                 return r;
5326
5327         r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0);
5328         if (r) {
5329                 DRM_ERROR("amdgpu_irq_get() failed to get IRQ for EDC, r: %d.\n", r);
5330                 return r;
5331         }
5332
5333         r = amdgpu_irq_get(adev, &adev->gfx.sq_irq, 0);
5334         if (r) {
5335                 DRM_ERROR(
5336                         "amdgpu_irq_get() failed to get IRQ for SQ, r: %d.\n",
5337                         r);
5338                 return r;
5339         }
5340
5341         return 0;
5342 }
5343
5344 static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
5345                                                        bool enable)
5346 {
5347         if (((adev->asic_type == CHIP_POLARIS11) ||
5348             (adev->asic_type == CHIP_POLARIS12) ||
5349             (adev->asic_type == CHIP_VEGAM)) &&
5350             adev->powerplay.pp_funcs->set_powergating_by_smu)
5351                 /* Send msg to SMU via Powerplay */
5352                 amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, enable);
5353
5354         WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0);
5355 }
5356
5357 static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
5358                                                         bool enable)
5359 {
5360         WREG32_FIELD(RLC_PG_CNTL, DYN_PER_CU_PG_ENABLE, enable ? 1 : 0);
5361 }
5362
5363 static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
5364                 bool enable)
5365 {
5366         WREG32_FIELD(RLC_PG_CNTL, QUICK_PG_ENABLE, enable ? 1 : 0);
5367 }
5368
5369 static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
5370                                           bool enable)
5371 {
5372         WREG32_FIELD(RLC_PG_CNTL, GFX_POWER_GATING_ENABLE, enable ? 1 : 0);
5373 }
5374
5375 static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev,
5376                                                 bool enable)
5377 {
5378         WREG32_FIELD(RLC_PG_CNTL, GFX_PIPELINE_PG_ENABLE, enable ? 1 : 0);
5379
5380         /* Read any GFX register to wake up GFX. */
5381         if (!enable)
5382                 RREG32(mmDB_RENDER_CONTROL);
5383 }
5384
5385 static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev,
5386                                           bool enable)
5387 {
5388         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
5389                 cz_enable_gfx_cg_power_gating(adev, true);
5390                 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
5391                         cz_enable_gfx_pipeline_power_gating(adev, true);
5392         } else {
5393                 cz_enable_gfx_cg_power_gating(adev, false);
5394                 cz_enable_gfx_pipeline_power_gating(adev, false);
5395         }
5396 }
5397
5398 static int gfx_v8_0_set_powergating_state(void *handle,
5399                                           enum amd_powergating_state state)
5400 {
5401         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5402         bool enable = (state == AMD_PG_STATE_GATE);
5403
5404         if (amdgpu_sriov_vf(adev))
5405                 return 0;
5406
5407         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_SMG |
5408                                 AMD_PG_SUPPORT_RLC_SMU_HS |
5409                                 AMD_PG_SUPPORT_CP |
5410                                 AMD_PG_SUPPORT_GFX_DMG))
5411                 amdgpu_gfx_rlc_enter_safe_mode(adev);
5412         switch (adev->asic_type) {
5413         case CHIP_CARRIZO:
5414         case CHIP_STONEY:
5415
5416                 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
5417                         cz_enable_sck_slow_down_on_power_up(adev, true);
5418                         cz_enable_sck_slow_down_on_power_down(adev, true);
5419                 } else {
5420                         cz_enable_sck_slow_down_on_power_up(adev, false);
5421                         cz_enable_sck_slow_down_on_power_down(adev, false);
5422                 }
5423                 if (adev->pg_flags & AMD_PG_SUPPORT_CP)
5424                         cz_enable_cp_power_gating(adev, true);
5425                 else
5426                         cz_enable_cp_power_gating(adev, false);
5427
5428                 cz_update_gfx_cg_power_gating(adev, enable);
5429
5430                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5431                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5432                 else
5433                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5434
5435                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5436                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5437                 else
5438                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5439                 break;
5440         case CHIP_POLARIS11:
5441         case CHIP_POLARIS12:
5442         case CHIP_VEGAM:
5443                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5444                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5445                 else
5446                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5447
5448                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5449                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5450                 else
5451                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5452
5453                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable)
5454                         polaris11_enable_gfx_quick_mg_power_gating(adev, true);
5455                 else
5456                         polaris11_enable_gfx_quick_mg_power_gating(adev, false);
5457                 break;
5458         default:
5459                 break;
5460         }
5461         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_SMG |
5462                                 AMD_PG_SUPPORT_RLC_SMU_HS |
5463                                 AMD_PG_SUPPORT_CP |
5464                                 AMD_PG_SUPPORT_GFX_DMG))
5465                 amdgpu_gfx_rlc_exit_safe_mode(adev);
5466         return 0;
5467 }
5468
5469 static void gfx_v8_0_get_clockgating_state(void *handle, u32 *flags)
5470 {
5471         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5472         int data;
5473
5474         if (amdgpu_sriov_vf(adev))
5475                 *flags = 0;
5476
5477         /* AMD_CG_SUPPORT_GFX_MGCG */
5478         data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5479         if (!(data & RLC_CGTT_MGCG_OVERRIDE__CPF_MASK))
5480                 *flags |= AMD_CG_SUPPORT_GFX_MGCG;
5481
5482         /* AMD_CG_SUPPORT_GFX_CGLG */
5483         data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5484         if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5485                 *flags |= AMD_CG_SUPPORT_GFX_CGCG;
5486
5487         /* AMD_CG_SUPPORT_GFX_CGLS */
5488         if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5489                 *flags |= AMD_CG_SUPPORT_GFX_CGLS;
5490
5491         /* AMD_CG_SUPPORT_GFX_CGTS */
5492         data = RREG32(mmCGTS_SM_CTRL_REG);
5493         if (!(data & CGTS_SM_CTRL_REG__OVERRIDE_MASK))
5494                 *flags |= AMD_CG_SUPPORT_GFX_CGTS;
5495
5496         /* AMD_CG_SUPPORT_GFX_CGTS_LS */
5497         if (!(data & CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK))
5498                 *flags |= AMD_CG_SUPPORT_GFX_CGTS_LS;
5499
5500         /* AMD_CG_SUPPORT_GFX_RLC_LS */
5501         data = RREG32(mmRLC_MEM_SLP_CNTL);
5502         if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
5503                 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
5504
5505         /* AMD_CG_SUPPORT_GFX_CP_LS */
5506         data = RREG32(mmCP_MEM_SLP_CNTL);
5507         if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
5508                 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
5509 }
5510
5511 static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
5512                                      uint32_t reg_addr, uint32_t cmd)
5513 {
5514         uint32_t data;
5515
5516         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
5517
5518         WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5519         WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5520
5521         data = RREG32(mmRLC_SERDES_WR_CTRL);
5522         if (adev->asic_type == CHIP_STONEY)
5523                 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5524                           RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5525                           RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5526                           RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5527                           RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5528                           RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5529                           RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5530                           RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5531                           RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5532         else
5533                 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5534                           RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5535                           RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5536                           RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5537                           RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5538                           RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5539                           RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5540                           RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5541                           RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
5542                           RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
5543                           RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5544         data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
5545                  (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
5546                  (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
5547                  (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
5548
5549         WREG32(mmRLC_SERDES_WR_CTRL, data);
5550 }
5551
5552 #define MSG_ENTER_RLC_SAFE_MODE     1
5553 #define MSG_EXIT_RLC_SAFE_MODE      0
5554 #define RLC_GPR_REG2__REQ_MASK 0x00000001
5555 #define RLC_GPR_REG2__REQ__SHIFT 0
5556 #define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001
5557 #define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e
5558
5559 static bool gfx_v8_0_is_rlc_enabled(struct amdgpu_device *adev)
5560 {
5561         uint32_t rlc_setting;
5562
5563         rlc_setting = RREG32(mmRLC_CNTL);
5564         if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
5565                 return false;
5566
5567         return true;
5568 }
5569
5570 static void gfx_v8_0_set_safe_mode(struct amdgpu_device *adev)
5571 {
5572         uint32_t data;
5573         unsigned i;
5574         data = RREG32(mmRLC_CNTL);
5575         data |= RLC_SAFE_MODE__CMD_MASK;
5576         data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5577         data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
5578         WREG32(mmRLC_SAFE_MODE, data);
5579
5580         /* wait for RLC_SAFE_MODE */
5581         for (i = 0; i < adev->usec_timeout; i++) {
5582                 if ((RREG32(mmRLC_GPM_STAT) &
5583                      (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5584                       RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5585                     (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5586                      RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5587                         break;
5588                 udelay(1);
5589         }
5590         for (i = 0; i < adev->usec_timeout; i++) {
5591                 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5592                         break;
5593                 udelay(1);
5594         }
5595 }
5596
5597 static void gfx_v8_0_unset_safe_mode(struct amdgpu_device *adev)
5598 {
5599         uint32_t data;
5600         unsigned i;
5601
5602         data = RREG32(mmRLC_CNTL);
5603         data |= RLC_SAFE_MODE__CMD_MASK;
5604         data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5605         WREG32(mmRLC_SAFE_MODE, data);
5606
5607         for (i = 0; i < adev->usec_timeout; i++) {
5608                 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5609                         break;
5610                 udelay(1);
5611         }
5612 }
5613
5614 static void gfx_v8_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid)
5615 {
5616         u32 data;
5617
5618         if (amdgpu_sriov_is_pp_one_vf(adev))
5619                 data = RREG32_NO_KIQ(mmRLC_SPM_VMID);
5620         else
5621                 data = RREG32(mmRLC_SPM_VMID);
5622
5623         data &= ~RLC_SPM_VMID__RLC_SPM_VMID_MASK;
5624         data |= (vmid & RLC_SPM_VMID__RLC_SPM_VMID_MASK) << RLC_SPM_VMID__RLC_SPM_VMID__SHIFT;
5625
5626         if (amdgpu_sriov_is_pp_one_vf(adev))
5627                 WREG32_NO_KIQ(mmRLC_SPM_VMID, data);
5628         else
5629                 WREG32(mmRLC_SPM_VMID, data);
5630 }
5631
5632 static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
5633         .is_rlc_enabled = gfx_v8_0_is_rlc_enabled,
5634         .set_safe_mode = gfx_v8_0_set_safe_mode,
5635         .unset_safe_mode = gfx_v8_0_unset_safe_mode,
5636         .init = gfx_v8_0_rlc_init,
5637         .get_csb_size = gfx_v8_0_get_csb_size,
5638         .get_csb_buffer = gfx_v8_0_get_csb_buffer,
5639         .get_cp_table_num = gfx_v8_0_cp_jump_table_num,
5640         .resume = gfx_v8_0_rlc_resume,
5641         .stop = gfx_v8_0_rlc_stop,
5642         .reset = gfx_v8_0_rlc_reset,
5643         .start = gfx_v8_0_rlc_start,
5644         .update_spm_vmid = gfx_v8_0_update_spm_vmid
5645 };
5646
5647 static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
5648                                                       bool enable)
5649 {
5650         uint32_t temp, data;
5651
5652         amdgpu_gfx_rlc_enter_safe_mode(adev);
5653
5654         /* It is disabled by HW by default */
5655         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
5656                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5657                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS)
5658                                 /* 1 - RLC memory Light sleep */
5659                                 WREG32_FIELD(RLC_MEM_SLP_CNTL, RLC_MEM_LS_EN, 1);
5660
5661                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS)
5662                                 WREG32_FIELD(CP_MEM_SLP_CNTL, CP_MEM_LS_EN, 1);
5663                 }
5664
5665                 /* 3 - RLC_CGTT_MGCG_OVERRIDE */
5666                 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5667                 if (adev->flags & AMD_IS_APU)
5668                         data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5669                                   RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5670                                   RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK);
5671                 else
5672                         data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5673                                   RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5674                                   RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5675                                   RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5676
5677                 if (temp != data)
5678                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5679
5680                 /* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5681                 gfx_v8_0_wait_for_rlc_serdes(adev);
5682
5683                 /* 5 - clear mgcg override */
5684                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5685
5686                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
5687                         /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
5688                         temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5689                         data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
5690                         data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
5691                         data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
5692                         data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
5693                         if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
5694                             (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
5695                                 data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
5696                         data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
5697                         data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
5698                         if (temp != data)
5699                                 WREG32(mmCGTS_SM_CTRL_REG, data);
5700                 }
5701                 udelay(50);
5702
5703                 /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5704                 gfx_v8_0_wait_for_rlc_serdes(adev);
5705         } else {
5706                 /* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
5707                 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5708                 data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5709                                 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5710                                 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5711                                 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5712                 if (temp != data)
5713                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5714
5715                 /* 2 - disable MGLS in RLC */
5716                 data = RREG32(mmRLC_MEM_SLP_CNTL);
5717                 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
5718                         data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5719                         WREG32(mmRLC_MEM_SLP_CNTL, data);
5720                 }
5721
5722                 /* 3 - disable MGLS in CP */
5723                 data = RREG32(mmCP_MEM_SLP_CNTL);
5724                 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
5725                         data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5726                         WREG32(mmCP_MEM_SLP_CNTL, data);
5727                 }
5728
5729                 /* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
5730                 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5731                 data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
5732                                 CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
5733                 if (temp != data)
5734                         WREG32(mmCGTS_SM_CTRL_REG, data);
5735
5736                 /* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5737                 gfx_v8_0_wait_for_rlc_serdes(adev);
5738
5739                 /* 6 - set mgcg override */
5740                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5741
5742                 udelay(50);
5743
5744                 /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5745                 gfx_v8_0_wait_for_rlc_serdes(adev);
5746         }
5747
5748         amdgpu_gfx_rlc_exit_safe_mode(adev);
5749 }
5750
5751 static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5752                                                       bool enable)
5753 {
5754         uint32_t temp, temp1, data, data1;
5755
5756         temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5757
5758         amdgpu_gfx_rlc_enter_safe_mode(adev);
5759
5760         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
5761                 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5762                 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
5763                 if (temp1 != data1)
5764                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5765
5766                 /* : wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5767                 gfx_v8_0_wait_for_rlc_serdes(adev);
5768
5769                 /* 2 - clear cgcg override */
5770                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5771
5772                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5773                 gfx_v8_0_wait_for_rlc_serdes(adev);
5774
5775                 /* 3 - write cmd to set CGLS */
5776                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
5777
5778                 /* 4 - enable cgcg */
5779                 data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5780
5781                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5782                         /* enable cgls*/
5783                         data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5784
5785                         temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5786                         data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
5787
5788                         if (temp1 != data1)
5789                                 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5790                 } else {
5791                         data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5792                 }
5793
5794                 if (temp != data)
5795                         WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5796
5797                 /* 5 enable cntx_empty_int_enable/cntx_busy_int_enable/
5798                  * Cmp_busy/GFX_Idle interrupts
5799                  */
5800                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5801         } else {
5802                 /* disable cntx_empty_int_enable & GFX Idle interrupt */
5803                 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5804
5805                 /* TEST CGCG */
5806                 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5807                 data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
5808                                 RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
5809                 if (temp1 != data1)
5810                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5811
5812                 /* read gfx register to wake up cgcg */
5813                 RREG32(mmCB_CGTT_SCLK_CTRL);
5814                 RREG32(mmCB_CGTT_SCLK_CTRL);
5815                 RREG32(mmCB_CGTT_SCLK_CTRL);
5816                 RREG32(mmCB_CGTT_SCLK_CTRL);
5817
5818                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5819                 gfx_v8_0_wait_for_rlc_serdes(adev);
5820
5821                 /* write cmd to Set CGCG Overrride */
5822                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5823
5824                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5825                 gfx_v8_0_wait_for_rlc_serdes(adev);
5826
5827                 /* write cmd to Clear CGLS */
5828                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
5829
5830                 /* disable cgcg, cgls should be disabled too. */
5831                 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
5832                           RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
5833                 if (temp != data)
5834                         WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5835                 /* enable interrupts again for PG */
5836                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5837         }
5838
5839         gfx_v8_0_wait_for_rlc_serdes(adev);
5840
5841         amdgpu_gfx_rlc_exit_safe_mode(adev);
5842 }
5843 static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
5844                                             bool enable)
5845 {
5846         if (enable) {
5847                 /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
5848                  * ===  MGCG + MGLS + TS(CG/LS) ===
5849                  */
5850                 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5851                 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5852         } else {
5853                 /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
5854                  * ===  CGCG + CGLS ===
5855                  */
5856                 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5857                 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5858         }
5859         return 0;
5860 }
5861
5862 static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev,
5863                                           enum amd_clockgating_state state)
5864 {
5865         uint32_t msg_id, pp_state = 0;
5866         uint32_t pp_support_state = 0;
5867
5868         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
5869                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5870                         pp_support_state = PP_STATE_SUPPORT_LS;
5871                         pp_state = PP_STATE_LS;
5872                 }
5873                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
5874                         pp_support_state |= PP_STATE_SUPPORT_CG;
5875                         pp_state |= PP_STATE_CG;
5876                 }
5877                 if (state == AMD_CG_STATE_UNGATE)
5878                         pp_state = 0;
5879
5880                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5881                                 PP_BLOCK_GFX_CG,
5882                                 pp_support_state,
5883                                 pp_state);
5884                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
5885                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5886         }
5887
5888         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
5889                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5890                         pp_support_state = PP_STATE_SUPPORT_LS;
5891                         pp_state = PP_STATE_LS;
5892                 }
5893
5894                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
5895                         pp_support_state |= PP_STATE_SUPPORT_CG;
5896                         pp_state |= PP_STATE_CG;
5897                 }
5898
5899                 if (state == AMD_CG_STATE_UNGATE)
5900                         pp_state = 0;
5901
5902                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5903                                 PP_BLOCK_GFX_MG,
5904                                 pp_support_state,
5905                                 pp_state);
5906                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
5907                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5908         }
5909
5910         return 0;
5911 }
5912
5913 static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev,
5914                                           enum amd_clockgating_state state)
5915 {
5916
5917         uint32_t msg_id, pp_state = 0;
5918         uint32_t pp_support_state = 0;
5919
5920         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
5921                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5922                         pp_support_state = PP_STATE_SUPPORT_LS;
5923                         pp_state = PP_STATE_LS;
5924                 }
5925                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
5926                         pp_support_state |= PP_STATE_SUPPORT_CG;
5927                         pp_state |= PP_STATE_CG;
5928                 }
5929                 if (state == AMD_CG_STATE_UNGATE)
5930                         pp_state = 0;
5931
5932                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5933                                 PP_BLOCK_GFX_CG,
5934                                 pp_support_state,
5935                                 pp_state);
5936                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
5937                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5938         }
5939
5940         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_GFX_3D_CGLS)) {
5941                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) {
5942                         pp_support_state = PP_STATE_SUPPORT_LS;
5943                         pp_state = PP_STATE_LS;
5944                 }
5945                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) {
5946                         pp_support_state |= PP_STATE_SUPPORT_CG;
5947                         pp_state |= PP_STATE_CG;
5948                 }
5949                 if (state == AMD_CG_STATE_UNGATE)
5950                         pp_state = 0;
5951
5952                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5953                                 PP_BLOCK_GFX_3D,
5954                                 pp_support_state,
5955                                 pp_state);
5956                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
5957                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5958         }
5959
5960         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
5961                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5962                         pp_support_state = PP_STATE_SUPPORT_LS;
5963                         pp_state = PP_STATE_LS;
5964                 }
5965
5966                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
5967                         pp_support_state |= PP_STATE_SUPPORT_CG;
5968                         pp_state |= PP_STATE_CG;
5969                 }
5970
5971                 if (state == AMD_CG_STATE_UNGATE)
5972                         pp_state = 0;
5973
5974                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5975                                 PP_BLOCK_GFX_MG,
5976                                 pp_support_state,
5977                                 pp_state);
5978                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
5979                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5980         }
5981
5982         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
5983                 pp_support_state = PP_STATE_SUPPORT_LS;
5984
5985                 if (state == AMD_CG_STATE_UNGATE)
5986                         pp_state = 0;
5987                 else
5988                         pp_state = PP_STATE_LS;
5989
5990                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5991                                 PP_BLOCK_GFX_RLC,
5992                                 pp_support_state,
5993                                 pp_state);
5994                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
5995                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5996         }
5997
5998         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
5999                 pp_support_state = PP_STATE_SUPPORT_LS;
6000
6001                 if (state == AMD_CG_STATE_UNGATE)
6002                         pp_state = 0;
6003                 else
6004                         pp_state = PP_STATE_LS;
6005                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6006                         PP_BLOCK_GFX_CP,
6007                         pp_support_state,
6008                         pp_state);
6009                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6010                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6011         }
6012
6013         return 0;
6014 }
6015
6016 static int gfx_v8_0_set_clockgating_state(void *handle,
6017                                           enum amd_clockgating_state state)
6018 {
6019         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
6020
6021         if (amdgpu_sriov_vf(adev))
6022                 return 0;
6023
6024         switch (adev->asic_type) {
6025         case CHIP_FIJI:
6026         case CHIP_CARRIZO:
6027         case CHIP_STONEY:
6028                 gfx_v8_0_update_gfx_clock_gating(adev,
6029                                                  state == AMD_CG_STATE_GATE);
6030                 break;
6031         case CHIP_TONGA:
6032                 gfx_v8_0_tonga_update_gfx_clock_gating(adev, state);
6033                 break;
6034         case CHIP_POLARIS10:
6035         case CHIP_POLARIS11:
6036         case CHIP_POLARIS12:
6037         case CHIP_VEGAM:
6038                 gfx_v8_0_polaris_update_gfx_clock_gating(adev, state);
6039                 break;
6040         default:
6041                 break;
6042         }
6043         return 0;
6044 }
6045
6046 static u64 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring)
6047 {
6048         return ring->adev->wb.wb[ring->rptr_offs];
6049 }
6050
6051 static u64 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
6052 {
6053         struct amdgpu_device *adev = ring->adev;
6054
6055         if (ring->use_doorbell)
6056                 /* XXX check if swapping is necessary on BE */
6057                 return ring->adev->wb.wb[ring->wptr_offs];
6058         else
6059                 return RREG32(mmCP_RB0_WPTR);
6060 }
6061
6062 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
6063 {
6064         struct amdgpu_device *adev = ring->adev;
6065
6066         if (ring->use_doorbell) {
6067                 /* XXX check if swapping is necessary on BE */
6068                 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6069                 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6070         } else {
6071                 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
6072                 (void)RREG32(mmCP_RB0_WPTR);
6073         }
6074 }
6075
6076 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
6077 {
6078         u32 ref_and_mask, reg_mem_engine;
6079
6080         if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) ||
6081             (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)) {
6082                 switch (ring->me) {
6083                 case 1:
6084                         ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
6085                         break;
6086                 case 2:
6087                         ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
6088                         break;
6089                 default:
6090                         return;
6091                 }
6092                 reg_mem_engine = 0;
6093         } else {
6094                 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
6095                 reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
6096         }
6097
6098         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6099         amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
6100                                  WAIT_REG_MEM_FUNCTION(3) |  /* == */
6101                                  reg_mem_engine));
6102         amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
6103         amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
6104         amdgpu_ring_write(ring, ref_and_mask);
6105         amdgpu_ring_write(ring, ref_and_mask);
6106         amdgpu_ring_write(ring, 0x20); /* poll interval */
6107 }
6108
6109 static void gfx_v8_0_ring_emit_vgt_flush(struct amdgpu_ring *ring)
6110 {
6111         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6112         amdgpu_ring_write(ring, EVENT_TYPE(VS_PARTIAL_FLUSH) |
6113                 EVENT_INDEX(4));
6114
6115         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6116         amdgpu_ring_write(ring, EVENT_TYPE(VGT_FLUSH) |
6117                 EVENT_INDEX(0));
6118 }
6119
6120 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
6121                                         struct amdgpu_job *job,
6122                                         struct amdgpu_ib *ib,
6123                                         uint32_t flags)
6124 {
6125         unsigned vmid = AMDGPU_JOB_GET_VMID(job);
6126         u32 header, control = 0;
6127
6128         if (ib->flags & AMDGPU_IB_FLAG_CE)
6129                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
6130         else
6131                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
6132
6133         control |= ib->length_dw | (vmid << 24);
6134
6135         if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
6136                 control |= INDIRECT_BUFFER_PRE_ENB(1);
6137
6138                 if (!(ib->flags & AMDGPU_IB_FLAG_CE) && vmid)
6139                         gfx_v8_0_ring_emit_de_meta(ring);
6140         }
6141
6142         amdgpu_ring_write(ring, header);
6143         amdgpu_ring_write(ring,
6144 #ifdef __BIG_ENDIAN
6145                           (2 << 0) |
6146 #endif
6147                           (ib->gpu_addr & 0xFFFFFFFC));
6148         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6149         amdgpu_ring_write(ring, control);
6150 }
6151
6152 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
6153                                           struct amdgpu_job *job,
6154                                           struct amdgpu_ib *ib,
6155                                           uint32_t flags)
6156 {
6157         unsigned vmid = AMDGPU_JOB_GET_VMID(job);
6158         u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
6159
6160         /* Currently, there is a high possibility to get wave ID mismatch
6161          * between ME and GDS, leading to a hw deadlock, because ME generates
6162          * different wave IDs than the GDS expects. This situation happens
6163          * randomly when at least 5 compute pipes use GDS ordered append.
6164          * The wave IDs generated by ME are also wrong after suspend/resume.
6165          * Those are probably bugs somewhere else in the kernel driver.
6166          *
6167          * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
6168          * GDS to 0 for this ring (me/pipe).
6169          */
6170         if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
6171                 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
6172                 amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID - PACKET3_SET_CONFIG_REG_START);
6173                 amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
6174         }
6175
6176         amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
6177         amdgpu_ring_write(ring,
6178 #ifdef __BIG_ENDIAN
6179                                 (2 << 0) |
6180 #endif
6181                                 (ib->gpu_addr & 0xFFFFFFFC));
6182         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6183         amdgpu_ring_write(ring, control);
6184 }
6185
6186 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
6187                                          u64 seq, unsigned flags)
6188 {
6189         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6190         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6191
6192         /* Workaround for cache flush problems. First send a dummy EOP
6193          * event down the pipe with seq one below.
6194          */
6195         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6196         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6197                                  EOP_TC_ACTION_EN |
6198                                  EOP_TC_WB_ACTION_EN |
6199                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6200                                  EVENT_INDEX(5)));
6201         amdgpu_ring_write(ring, addr & 0xfffffffc);
6202         amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
6203                                 DATA_SEL(1) | INT_SEL(0));
6204         amdgpu_ring_write(ring, lower_32_bits(seq - 1));
6205         amdgpu_ring_write(ring, upper_32_bits(seq - 1));
6206
6207         /* Then send the real EOP event down the pipe:
6208          * EVENT_WRITE_EOP - flush caches, send int */
6209         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6210         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6211                                  EOP_TC_ACTION_EN |
6212                                  EOP_TC_WB_ACTION_EN |
6213                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6214                                  EVENT_INDEX(5)));
6215         amdgpu_ring_write(ring, addr & 0xfffffffc);
6216         amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
6217                           DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6218         amdgpu_ring_write(ring, lower_32_bits(seq));
6219         amdgpu_ring_write(ring, upper_32_bits(seq));
6220
6221 }
6222
6223 static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
6224 {
6225         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6226         uint32_t seq = ring->fence_drv.sync_seq;
6227         uint64_t addr = ring->fence_drv.gpu_addr;
6228
6229         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6230         amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
6231                                  WAIT_REG_MEM_FUNCTION(3) | /* equal */
6232                                  WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
6233         amdgpu_ring_write(ring, addr & 0xfffffffc);
6234         amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
6235         amdgpu_ring_write(ring, seq);
6236         amdgpu_ring_write(ring, 0xffffffff);
6237         amdgpu_ring_write(ring, 4); /* poll interval */
6238 }
6239
6240 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
6241                                         unsigned vmid, uint64_t pd_addr)
6242 {
6243         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6244
6245         amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
6246
6247         /* wait for the invalidate to complete */
6248         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6249         amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6250                                  WAIT_REG_MEM_FUNCTION(0) |  /* always */
6251                                  WAIT_REG_MEM_ENGINE(0))); /* me */
6252         amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6253         amdgpu_ring_write(ring, 0);
6254         amdgpu_ring_write(ring, 0); /* ref */
6255         amdgpu_ring_write(ring, 0); /* mask */
6256         amdgpu_ring_write(ring, 0x20); /* poll interval */
6257
6258         /* compute doesn't have PFP */
6259         if (usepfp) {
6260                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
6261                 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6262                 amdgpu_ring_write(ring, 0x0);
6263         }
6264 }
6265
6266 static u64 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
6267 {
6268         return ring->adev->wb.wb[ring->wptr_offs];
6269 }
6270
6271 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
6272 {
6273         struct amdgpu_device *adev = ring->adev;
6274
6275         /* XXX check if swapping is necessary on BE */
6276         adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6277         WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6278 }
6279
6280 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
6281                                              u64 addr, u64 seq,
6282                                              unsigned flags)
6283 {
6284         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6285         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6286
6287         /* RELEASE_MEM - flush caches, send int */
6288         amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
6289         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6290                                  EOP_TC_ACTION_EN |
6291                                  EOP_TC_WB_ACTION_EN |
6292                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6293                                  EVENT_INDEX(5)));
6294         amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6295         amdgpu_ring_write(ring, addr & 0xfffffffc);
6296         amdgpu_ring_write(ring, upper_32_bits(addr));
6297         amdgpu_ring_write(ring, lower_32_bits(seq));
6298         amdgpu_ring_write(ring, upper_32_bits(seq));
6299 }
6300
6301 static void gfx_v8_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
6302                                          u64 seq, unsigned int flags)
6303 {
6304         /* we only allocate 32bit for each seq wb address */
6305         BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
6306
6307         /* write fence seq to the "addr" */
6308         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6309         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6310                                  WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
6311         amdgpu_ring_write(ring, lower_32_bits(addr));
6312         amdgpu_ring_write(ring, upper_32_bits(addr));
6313         amdgpu_ring_write(ring, lower_32_bits(seq));
6314
6315         if (flags & AMDGPU_FENCE_FLAG_INT) {
6316                 /* set register to trigger INT */
6317                 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6318                 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6319                                          WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
6320                 amdgpu_ring_write(ring, mmCPC_INT_STATUS);
6321                 amdgpu_ring_write(ring, 0);
6322                 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
6323         }
6324 }
6325
6326 static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring)
6327 {
6328         amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6329         amdgpu_ring_write(ring, 0);
6330 }
6331
6332 static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
6333 {
6334         uint32_t dw2 = 0;
6335
6336         if (amdgpu_sriov_vf(ring->adev))
6337                 gfx_v8_0_ring_emit_ce_meta(ring);
6338
6339         dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
6340         if (flags & AMDGPU_HAVE_CTX_SWITCH) {
6341                 gfx_v8_0_ring_emit_vgt_flush(ring);
6342                 /* set load_global_config & load_global_uconfig */
6343                 dw2 |= 0x8001;
6344                 /* set load_cs_sh_regs */
6345                 dw2 |= 0x01000000;
6346                 /* set load_per_context_state & load_gfx_sh_regs for GFX */
6347                 dw2 |= 0x10002;
6348
6349                 /* set load_ce_ram if preamble presented */
6350                 if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
6351                         dw2 |= 0x10000000;
6352         } else {
6353                 /* still load_ce_ram if this is the first time preamble presented
6354                  * although there is no context switch happens.
6355                  */
6356                 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
6357                         dw2 |= 0x10000000;
6358         }
6359
6360         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6361         amdgpu_ring_write(ring, dw2);
6362         amdgpu_ring_write(ring, 0);
6363 }
6364
6365 static unsigned gfx_v8_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
6366 {
6367         unsigned ret;
6368
6369         amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
6370         amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
6371         amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
6372         amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
6373         ret = ring->wptr & ring->buf_mask;
6374         amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
6375         return ret;
6376 }
6377
6378 static void gfx_v8_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
6379 {
6380         unsigned cur;
6381
6382         BUG_ON(offset > ring->buf_mask);
6383         BUG_ON(ring->ring[offset] != 0x55aa55aa);
6384
6385         cur = (ring->wptr & ring->buf_mask) - 1;
6386         if (likely(cur > offset))
6387                 ring->ring[offset] = cur - offset;
6388         else
6389                 ring->ring[offset] = (ring->ring_size >> 2) - offset + cur;
6390 }
6391
6392 static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg,
6393                                     uint32_t reg_val_offs)
6394 {
6395         struct amdgpu_device *adev = ring->adev;
6396
6397         amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
6398         amdgpu_ring_write(ring, 0 |     /* src: register*/
6399                                 (5 << 8) |      /* dst: memory */
6400                                 (1 << 20));     /* write confirm */
6401         amdgpu_ring_write(ring, reg);
6402         amdgpu_ring_write(ring, 0);
6403         amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
6404                                 reg_val_offs * 4));
6405         amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
6406                                 reg_val_offs * 4));
6407 }
6408
6409 static void gfx_v8_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
6410                                   uint32_t val)
6411 {
6412         uint32_t cmd;
6413
6414         switch (ring->funcs->type) {
6415         case AMDGPU_RING_TYPE_GFX:
6416                 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
6417                 break;
6418         case AMDGPU_RING_TYPE_KIQ:
6419                 cmd = 1 << 16; /* no inc addr */
6420                 break;
6421         default:
6422                 cmd = WR_CONFIRM;
6423                 break;
6424         }
6425
6426         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6427         amdgpu_ring_write(ring, cmd);
6428         amdgpu_ring_write(ring, reg);
6429         amdgpu_ring_write(ring, 0);
6430         amdgpu_ring_write(ring, val);
6431 }
6432
6433 static void gfx_v8_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
6434 {
6435         struct amdgpu_device *adev = ring->adev;
6436         uint32_t value = 0;
6437
6438         value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
6439         value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
6440         value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
6441         value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
6442         WREG32(mmSQ_CMD, value);
6443 }
6444
6445 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
6446                                                  enum amdgpu_interrupt_state state)
6447 {
6448         WREG32_FIELD(CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE,
6449                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6450 }
6451
6452 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
6453                                                      int me, int pipe,
6454                                                      enum amdgpu_interrupt_state state)
6455 {
6456         u32 mec_int_cntl, mec_int_cntl_reg;
6457
6458         /*
6459          * amdgpu controls only the first MEC. That's why this function only
6460          * handles the setting of interrupts for this specific MEC. All other
6461          * pipes' interrupts are set by amdkfd.
6462          */
6463
6464         if (me == 1) {
6465                 switch (pipe) {
6466                 case 0:
6467                         mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
6468                         break;
6469                 case 1:
6470                         mec_int_cntl_reg = mmCP_ME1_PIPE1_INT_CNTL;
6471                         break;
6472                 case 2:
6473                         mec_int_cntl_reg = mmCP_ME1_PIPE2_INT_CNTL;
6474                         break;
6475                 case 3:
6476                         mec_int_cntl_reg = mmCP_ME1_PIPE3_INT_CNTL;
6477                         break;
6478                 default:
6479                         DRM_DEBUG("invalid pipe %d\n", pipe);
6480                         return;
6481                 }
6482         } else {
6483                 DRM_DEBUG("invalid me %d\n", me);
6484                 return;
6485         }
6486
6487         switch (state) {
6488         case AMDGPU_IRQ_STATE_DISABLE:
6489                 mec_int_cntl = RREG32(mec_int_cntl_reg);
6490                 mec_int_cntl &= ~CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6491                 WREG32(mec_int_cntl_reg, mec_int_cntl);
6492                 break;
6493         case AMDGPU_IRQ_STATE_ENABLE:
6494                 mec_int_cntl = RREG32(mec_int_cntl_reg);
6495                 mec_int_cntl |= CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6496                 WREG32(mec_int_cntl_reg, mec_int_cntl);
6497                 break;
6498         default:
6499                 break;
6500         }
6501 }
6502
6503 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
6504                                              struct amdgpu_irq_src *source,
6505                                              unsigned type,
6506                                              enum amdgpu_interrupt_state state)
6507 {
6508         WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE,
6509                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6510
6511         return 0;
6512 }
6513
6514 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
6515                                               struct amdgpu_irq_src *source,
6516                                               unsigned type,
6517                                               enum amdgpu_interrupt_state state)
6518 {
6519         WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE,
6520                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6521
6522         return 0;
6523 }
6524
6525 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6526                                             struct amdgpu_irq_src *src,
6527                                             unsigned type,
6528                                             enum amdgpu_interrupt_state state)
6529 {
6530         switch (type) {
6531         case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
6532                 gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
6533                 break;
6534         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6535                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6536                 break;
6537         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6538                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6539                 break;
6540         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6541                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6542                 break;
6543         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6544                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6545                 break;
6546         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6547                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6548                 break;
6549         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6550                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6551                 break;
6552         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6553                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6554                 break;
6555         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6556                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6557                 break;
6558         default:
6559                 break;
6560         }
6561         return 0;
6562 }
6563
6564 static int gfx_v8_0_set_cp_ecc_int_state(struct amdgpu_device *adev,
6565                                          struct amdgpu_irq_src *source,
6566                                          unsigned int type,
6567                                          enum amdgpu_interrupt_state state)
6568 {
6569         int enable_flag;
6570
6571         switch (state) {
6572         case AMDGPU_IRQ_STATE_DISABLE:
6573                 enable_flag = 0;
6574                 break;
6575
6576         case AMDGPU_IRQ_STATE_ENABLE:
6577                 enable_flag = 1;
6578                 break;
6579
6580         default:
6581                 return -EINVAL;
6582         }
6583
6584         WREG32_FIELD(CP_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6585         WREG32_FIELD(CP_INT_CNTL_RING0, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6586         WREG32_FIELD(CP_INT_CNTL_RING1, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6587         WREG32_FIELD(CP_INT_CNTL_RING2, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6588         WREG32_FIELD(CPC_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6589         WREG32_FIELD(CP_ME1_PIPE0_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6590                      enable_flag);
6591         WREG32_FIELD(CP_ME1_PIPE1_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6592                      enable_flag);
6593         WREG32_FIELD(CP_ME1_PIPE2_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6594                      enable_flag);
6595         WREG32_FIELD(CP_ME1_PIPE3_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6596                      enable_flag);
6597         WREG32_FIELD(CP_ME2_PIPE0_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6598                      enable_flag);
6599         WREG32_FIELD(CP_ME2_PIPE1_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6600                      enable_flag);
6601         WREG32_FIELD(CP_ME2_PIPE2_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6602                      enable_flag);
6603         WREG32_FIELD(CP_ME2_PIPE3_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6604                      enable_flag);
6605
6606         return 0;
6607 }
6608
6609 static int gfx_v8_0_set_sq_int_state(struct amdgpu_device *adev,
6610                                      struct amdgpu_irq_src *source,
6611                                      unsigned int type,
6612                                      enum amdgpu_interrupt_state state)
6613 {
6614         int enable_flag;
6615
6616         switch (state) {
6617         case AMDGPU_IRQ_STATE_DISABLE:
6618                 enable_flag = 1;
6619                 break;
6620
6621         case AMDGPU_IRQ_STATE_ENABLE:
6622                 enable_flag = 0;
6623                 break;
6624
6625         default:
6626                 return -EINVAL;
6627         }
6628
6629         WREG32_FIELD(SQ_INTERRUPT_MSG_CTRL, STALL,
6630                      enable_flag);
6631
6632         return 0;
6633 }
6634
6635 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
6636                             struct amdgpu_irq_src *source,
6637                             struct amdgpu_iv_entry *entry)
6638 {
6639         int i;
6640         u8 me_id, pipe_id, queue_id;
6641         struct amdgpu_ring *ring;
6642
6643         DRM_DEBUG("IH: CP EOP\n");
6644         me_id = (entry->ring_id & 0x0c) >> 2;
6645         pipe_id = (entry->ring_id & 0x03) >> 0;
6646         queue_id = (entry->ring_id & 0x70) >> 4;
6647
6648         switch (me_id) {
6649         case 0:
6650                 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6651                 break;
6652         case 1:
6653         case 2:
6654                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6655                         ring = &adev->gfx.compute_ring[i];
6656                         /* Per-queue interrupt is supported for MEC starting from VI.
6657                           * The interrupt can only be enabled/disabled per pipe instead of per queue.
6658                           */
6659                         if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6660                                 amdgpu_fence_process(ring);
6661                 }
6662                 break;
6663         }
6664         return 0;
6665 }
6666
6667 static void gfx_v8_0_fault(struct amdgpu_device *adev,
6668                            struct amdgpu_iv_entry *entry)
6669 {
6670         u8 me_id, pipe_id, queue_id;
6671         struct amdgpu_ring *ring;
6672         int i;
6673
6674         me_id = (entry->ring_id & 0x0c) >> 2;
6675         pipe_id = (entry->ring_id & 0x03) >> 0;
6676         queue_id = (entry->ring_id & 0x70) >> 4;
6677
6678         switch (me_id) {
6679         case 0:
6680                 drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
6681                 break;
6682         case 1:
6683         case 2:
6684                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6685                         ring = &adev->gfx.compute_ring[i];
6686                         if (ring->me == me_id && ring->pipe == pipe_id &&
6687                             ring->queue == queue_id)
6688                                 drm_sched_fault(&ring->sched);
6689                 }
6690                 break;
6691         }
6692 }
6693
6694 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
6695                                  struct amdgpu_irq_src *source,
6696                                  struct amdgpu_iv_entry *entry)
6697 {
6698         DRM_ERROR("Illegal register access in command stream\n");
6699         gfx_v8_0_fault(adev, entry);
6700         return 0;
6701 }
6702
6703 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
6704                                   struct amdgpu_irq_src *source,
6705                                   struct amdgpu_iv_entry *entry)
6706 {
6707         DRM_ERROR("Illegal instruction in command stream\n");
6708         gfx_v8_0_fault(adev, entry);
6709         return 0;
6710 }
6711
6712 static int gfx_v8_0_cp_ecc_error_irq(struct amdgpu_device *adev,
6713                                      struct amdgpu_irq_src *source,
6714                                      struct amdgpu_iv_entry *entry)
6715 {
6716         DRM_ERROR("CP EDC/ECC error detected.");
6717         return 0;
6718 }
6719
6720 static void gfx_v8_0_parse_sq_irq(struct amdgpu_device *adev, unsigned ih_data)
6721 {
6722         u32 enc, se_id, sh_id, cu_id;
6723         char type[20];
6724         int sq_edc_source = -1;
6725
6726         enc = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_CMN, ENCODING);
6727         se_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_CMN, SE_ID);
6728
6729         switch (enc) {
6730                 case 0:
6731                         DRM_INFO("SQ general purpose intr detected:"
6732                                         "se_id %d, immed_overflow %d, host_reg_overflow %d,"
6733                                         "host_cmd_overflow %d, cmd_timestamp %d,"
6734                                         "reg_timestamp %d, thread_trace_buff_full %d,"
6735                                         "wlt %d, thread_trace %d.\n",
6736                                         se_id,
6737                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, IMMED_OVERFLOW),
6738                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, HOST_REG_OVERFLOW),
6739                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, HOST_CMD_OVERFLOW),
6740                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, CMD_TIMESTAMP),
6741                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, REG_TIMESTAMP),
6742                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, THREAD_TRACE_BUF_FULL),
6743                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, WLT),
6744                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, THREAD_TRACE)
6745                                         );
6746                         break;
6747                 case 1:
6748                 case 2:
6749
6750                         cu_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, CU_ID);
6751                         sh_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, SH_ID);
6752
6753                         /*
6754                          * This function can be called either directly from ISR
6755                          * or from BH in which case we can access SQ_EDC_INFO
6756                          * instance
6757                          */
6758                         if (in_task()) {
6759                                 mutex_lock(&adev->grbm_idx_mutex);
6760                                 gfx_v8_0_select_se_sh(adev, se_id, sh_id, cu_id);
6761
6762                                 sq_edc_source = REG_GET_FIELD(RREG32(mmSQ_EDC_INFO), SQ_EDC_INFO, SOURCE);
6763
6764                                 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6765                                 mutex_unlock(&adev->grbm_idx_mutex);
6766                         }
6767
6768                         if (enc == 1)
6769                                 sprintf(type, "instruction intr");
6770                         else
6771                                 sprintf(type, "EDC/ECC error");
6772
6773                         DRM_INFO(
6774                                 "SQ %s detected: "
6775                                         "se_id %d, sh_id %d, cu_id %d, simd_id %d, wave_id %d, vm_id %d "
6776                                         "trap %s, sq_ed_info.source %s.\n",
6777                                         type, se_id, sh_id, cu_id,
6778                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, SIMD_ID),
6779                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, WAVE_ID),
6780                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, VM_ID),
6781                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, PRIV) ? "true" : "false",
6782                                         (sq_edc_source != -1) ? sq_edc_source_names[sq_edc_source] : "unavailable"
6783                                 );
6784                         break;
6785                 default:
6786                         DRM_ERROR("SQ invalid encoding type\n.");
6787         }
6788 }
6789
6790 static void gfx_v8_0_sq_irq_work_func(struct work_struct *work)
6791 {
6792
6793         struct amdgpu_device *adev = container_of(work, struct amdgpu_device, gfx.sq_work.work);
6794         struct sq_work *sq_work = container_of(work, struct sq_work, work);
6795
6796         gfx_v8_0_parse_sq_irq(adev, sq_work->ih_data);
6797 }
6798
6799 static int gfx_v8_0_sq_irq(struct amdgpu_device *adev,
6800                            struct amdgpu_irq_src *source,
6801                            struct amdgpu_iv_entry *entry)
6802 {
6803         unsigned ih_data = entry->src_data[0];
6804
6805         /*
6806          * Try to submit work so SQ_EDC_INFO can be accessed from
6807          * BH. If previous work submission hasn't finished yet
6808          * just print whatever info is possible directly from the ISR.
6809          */
6810         if (work_pending(&adev->gfx.sq_work.work)) {
6811                 gfx_v8_0_parse_sq_irq(adev, ih_data);
6812         } else {
6813                 adev->gfx.sq_work.ih_data = ih_data;
6814                 schedule_work(&adev->gfx.sq_work.work);
6815         }
6816
6817         return 0;
6818 }
6819
6820 static void gfx_v8_0_emit_mem_sync(struct amdgpu_ring *ring)
6821 {
6822         amdgpu_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
6823         amdgpu_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
6824                           PACKET3_TC_ACTION_ENA |
6825                           PACKET3_SH_KCACHE_ACTION_ENA |
6826                           PACKET3_SH_ICACHE_ACTION_ENA |
6827                           PACKET3_TC_WB_ACTION_ENA);  /* CP_COHER_CNTL */
6828         amdgpu_ring_write(ring, 0xffffffff);  /* CP_COHER_SIZE */
6829         amdgpu_ring_write(ring, 0);  /* CP_COHER_BASE */
6830         amdgpu_ring_write(ring, 0x0000000A); /* poll interval */
6831 }
6832
6833 static void gfx_v8_0_emit_mem_sync_compute(struct amdgpu_ring *ring)
6834 {
6835         amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 5));
6836         amdgpu_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
6837                           PACKET3_TC_ACTION_ENA |
6838                           PACKET3_SH_KCACHE_ACTION_ENA |
6839                           PACKET3_SH_ICACHE_ACTION_ENA |
6840                           PACKET3_TC_WB_ACTION_ENA);  /* CP_COHER_CNTL */
6841         amdgpu_ring_write(ring, 0xffffffff);    /* CP_COHER_SIZE */
6842         amdgpu_ring_write(ring, 0xff);          /* CP_COHER_SIZE_HI */
6843         amdgpu_ring_write(ring, 0);             /* CP_COHER_BASE */
6844         amdgpu_ring_write(ring, 0);             /* CP_COHER_BASE_HI */
6845         amdgpu_ring_write(ring, 0x0000000A);    /* poll interval */
6846 }
6847
6848 static const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
6849         .name = "gfx_v8_0",
6850         .early_init = gfx_v8_0_early_init,
6851         .late_init = gfx_v8_0_late_init,
6852         .sw_init = gfx_v8_0_sw_init,
6853         .sw_fini = gfx_v8_0_sw_fini,
6854         .hw_init = gfx_v8_0_hw_init,
6855         .hw_fini = gfx_v8_0_hw_fini,
6856         .suspend = gfx_v8_0_suspend,
6857         .resume = gfx_v8_0_resume,
6858         .is_idle = gfx_v8_0_is_idle,
6859         .wait_for_idle = gfx_v8_0_wait_for_idle,
6860         .check_soft_reset = gfx_v8_0_check_soft_reset,
6861         .pre_soft_reset = gfx_v8_0_pre_soft_reset,
6862         .soft_reset = gfx_v8_0_soft_reset,
6863         .post_soft_reset = gfx_v8_0_post_soft_reset,
6864         .set_clockgating_state = gfx_v8_0_set_clockgating_state,
6865         .set_powergating_state = gfx_v8_0_set_powergating_state,
6866         .get_clockgating_state = gfx_v8_0_get_clockgating_state,
6867 };
6868
6869 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
6870         .type = AMDGPU_RING_TYPE_GFX,
6871         .align_mask = 0xff,
6872         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6873         .support_64bit_ptrs = false,
6874         .get_rptr = gfx_v8_0_ring_get_rptr,
6875         .get_wptr = gfx_v8_0_ring_get_wptr_gfx,
6876         .set_wptr = gfx_v8_0_ring_set_wptr_gfx,
6877         .emit_frame_size = /* maximum 215dw if count 16 IBs in */
6878                 5 +  /* COND_EXEC */
6879                 7 +  /* PIPELINE_SYNC */
6880                 VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 9 + /* VM_FLUSH */
6881                 12 +  /* FENCE for VM_FLUSH */
6882                 20 + /* GDS switch */
6883                 4 + /* double SWITCH_BUFFER,
6884                        the first COND_EXEC jump to the place just
6885                            prior to this double SWITCH_BUFFER  */
6886                 5 + /* COND_EXEC */
6887                 7 +      /*     HDP_flush */
6888                 4 +      /*     VGT_flush */
6889                 14 + /* CE_META */
6890                 31 + /* DE_META */
6891                 3 + /* CNTX_CTRL */
6892                 5 + /* HDP_INVL */
6893                 12 + 12 + /* FENCE x2 */
6894                 2 + /* SWITCH_BUFFER */
6895                 5, /* SURFACE_SYNC */
6896         .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_gfx */
6897         .emit_ib = gfx_v8_0_ring_emit_ib_gfx,
6898         .emit_fence = gfx_v8_0_ring_emit_fence_gfx,
6899         .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6900         .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6901         .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6902         .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6903         .test_ring = gfx_v8_0_ring_test_ring,
6904         .test_ib = gfx_v8_0_ring_test_ib,
6905         .insert_nop = amdgpu_ring_insert_nop,
6906         .pad_ib = amdgpu_ring_generic_pad_ib,
6907         .emit_switch_buffer = gfx_v8_ring_emit_sb,
6908         .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
6909         .init_cond_exec = gfx_v8_0_ring_emit_init_cond_exec,
6910         .patch_cond_exec = gfx_v8_0_ring_emit_patch_cond_exec,
6911         .emit_wreg = gfx_v8_0_ring_emit_wreg,
6912         .soft_recovery = gfx_v8_0_ring_soft_recovery,
6913         .emit_mem_sync = gfx_v8_0_emit_mem_sync,
6914 };
6915
6916 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
6917         .type = AMDGPU_RING_TYPE_COMPUTE,
6918         .align_mask = 0xff,
6919         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6920         .support_64bit_ptrs = false,
6921         .get_rptr = gfx_v8_0_ring_get_rptr,
6922         .get_wptr = gfx_v8_0_ring_get_wptr_compute,
6923         .set_wptr = gfx_v8_0_ring_set_wptr_compute,
6924         .emit_frame_size =
6925                 20 + /* gfx_v8_0_ring_emit_gds_switch */
6926                 7 + /* gfx_v8_0_ring_emit_hdp_flush */
6927                 5 + /* hdp_invalidate */
6928                 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6929                 VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v8_0_ring_emit_vm_flush */
6930                 7 + 7 + 7 + /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
6931                 7, /* gfx_v8_0_emit_mem_sync_compute */
6932         .emit_ib_size = 7, /* gfx_v8_0_ring_emit_ib_compute */
6933         .emit_ib = gfx_v8_0_ring_emit_ib_compute,
6934         .emit_fence = gfx_v8_0_ring_emit_fence_compute,
6935         .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6936         .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6937         .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6938         .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6939         .test_ring = gfx_v8_0_ring_test_ring,
6940         .test_ib = gfx_v8_0_ring_test_ib,
6941         .insert_nop = amdgpu_ring_insert_nop,
6942         .pad_ib = amdgpu_ring_generic_pad_ib,
6943         .emit_wreg = gfx_v8_0_ring_emit_wreg,
6944         .emit_mem_sync = gfx_v8_0_emit_mem_sync_compute,
6945 };
6946
6947 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {
6948         .type = AMDGPU_RING_TYPE_KIQ,
6949         .align_mask = 0xff,
6950         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6951         .support_64bit_ptrs = false,
6952         .get_rptr = gfx_v8_0_ring_get_rptr,
6953         .get_wptr = gfx_v8_0_ring_get_wptr_compute,
6954         .set_wptr = gfx_v8_0_ring_set_wptr_compute,
6955         .emit_frame_size =
6956                 20 + /* gfx_v8_0_ring_emit_gds_switch */
6957                 7 + /* gfx_v8_0_ring_emit_hdp_flush */
6958                 5 + /* hdp_invalidate */
6959                 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6960                 17 + /* gfx_v8_0_ring_emit_vm_flush */
6961                 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */
6962         .emit_ib_size = 7, /* gfx_v8_0_ring_emit_ib_compute */
6963         .emit_fence = gfx_v8_0_ring_emit_fence_kiq,
6964         .test_ring = gfx_v8_0_ring_test_ring,
6965         .insert_nop = amdgpu_ring_insert_nop,
6966         .pad_ib = amdgpu_ring_generic_pad_ib,
6967         .emit_rreg = gfx_v8_0_ring_emit_rreg,
6968         .emit_wreg = gfx_v8_0_ring_emit_wreg,
6969 };
6970
6971 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
6972 {
6973         int i;
6974
6975         adev->gfx.kiq.ring.funcs = &gfx_v8_0_ring_funcs_kiq;
6976
6977         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
6978                 adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
6979
6980         for (i = 0; i < adev->gfx.num_compute_rings; i++)
6981                 adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
6982 }
6983
6984 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
6985         .set = gfx_v8_0_set_eop_interrupt_state,
6986         .process = gfx_v8_0_eop_irq,
6987 };
6988
6989 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
6990         .set = gfx_v8_0_set_priv_reg_fault_state,
6991         .process = gfx_v8_0_priv_reg_irq,
6992 };
6993
6994 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
6995         .set = gfx_v8_0_set_priv_inst_fault_state,
6996         .process = gfx_v8_0_priv_inst_irq,
6997 };
6998
6999 static const struct amdgpu_irq_src_funcs gfx_v8_0_cp_ecc_error_irq_funcs = {
7000         .set = gfx_v8_0_set_cp_ecc_int_state,
7001         .process = gfx_v8_0_cp_ecc_error_irq,
7002 };
7003
7004 static const struct amdgpu_irq_src_funcs gfx_v8_0_sq_irq_funcs = {
7005         .set = gfx_v8_0_set_sq_int_state,
7006         .process = gfx_v8_0_sq_irq,
7007 };
7008
7009 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
7010 {
7011         adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
7012         adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
7013
7014         adev->gfx.priv_reg_irq.num_types = 1;
7015         adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
7016
7017         adev->gfx.priv_inst_irq.num_types = 1;
7018         adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
7019
7020         adev->gfx.cp_ecc_error_irq.num_types = 1;
7021         adev->gfx.cp_ecc_error_irq.funcs = &gfx_v8_0_cp_ecc_error_irq_funcs;
7022
7023         adev->gfx.sq_irq.num_types = 1;
7024         adev->gfx.sq_irq.funcs = &gfx_v8_0_sq_irq_funcs;
7025 }
7026
7027 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
7028 {
7029         adev->gfx.rlc.funcs = &iceland_rlc_funcs;
7030 }
7031
7032 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
7033 {
7034         /* init asci gds info */
7035         adev->gds.gds_size = RREG32(mmGDS_VMID0_SIZE);
7036         adev->gds.gws_size = 64;
7037         adev->gds.oa_size = 16;
7038         adev->gds.gds_compute_max_wave_id = RREG32(mmGDS_COMPUTE_MAX_WAVE_ID);
7039 }
7040
7041 static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
7042                                                  u32 bitmap)
7043 {
7044         u32 data;
7045
7046         if (!bitmap)
7047                 return;
7048
7049         data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7050         data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7051
7052         WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
7053 }
7054
7055 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
7056 {
7057         u32 data, mask;
7058
7059         data =  RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) |
7060                 RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
7061
7062         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
7063
7064         return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask;
7065 }
7066
7067 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
7068 {
7069         int i, j, k, counter, active_cu_number = 0;
7070         u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
7071         struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
7072         unsigned disable_masks[4 * 2];
7073         u32 ao_cu_num;
7074
7075         memset(cu_info, 0, sizeof(*cu_info));
7076
7077         if (adev->flags & AMD_IS_APU)
7078                 ao_cu_num = 2;
7079         else
7080                 ao_cu_num = adev->gfx.config.max_cu_per_sh;
7081
7082         amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
7083
7084         mutex_lock(&adev->grbm_idx_mutex);
7085         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
7086                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
7087                         mask = 1;
7088                         ao_bitmap = 0;
7089                         counter = 0;
7090                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
7091                         if (i < 4 && j < 2)
7092                                 gfx_v8_0_set_user_cu_inactive_bitmap(
7093                                         adev, disable_masks[i * 2 + j]);
7094                         bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
7095                         cu_info->bitmap[i][j] = bitmap;
7096
7097                         for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
7098                                 if (bitmap & mask) {
7099                                         if (counter < ao_cu_num)
7100                                                 ao_bitmap |= mask;
7101                                         counter ++;
7102                                 }
7103                                 mask <<= 1;
7104                         }
7105                         active_cu_number += counter;
7106                         if (i < 2 && j < 2)
7107                                 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
7108                         cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
7109                 }
7110         }
7111         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
7112         mutex_unlock(&adev->grbm_idx_mutex);
7113
7114         cu_info->number = active_cu_number;
7115         cu_info->ao_cu_mask = ao_cu_mask;
7116         cu_info->simd_per_cu = NUM_SIMD_PER_CU;
7117         cu_info->max_waves_per_simd = 10;
7118         cu_info->max_scratch_slots_per_cu = 32;
7119         cu_info->wave_front_size = 64;
7120         cu_info->lds_size = 64;
7121 }
7122
7123 const struct amdgpu_ip_block_version gfx_v8_0_ip_block =
7124 {
7125         .type = AMD_IP_BLOCK_TYPE_GFX,
7126         .major = 8,
7127         .minor = 0,
7128         .rev = 0,
7129         .funcs = &gfx_v8_0_ip_funcs,
7130 };
7131
7132 const struct amdgpu_ip_block_version gfx_v8_1_ip_block =
7133 {
7134         .type = AMD_IP_BLOCK_TYPE_GFX,
7135         .major = 8,
7136         .minor = 1,
7137         .rev = 0,
7138         .funcs = &gfx_v8_0_ip_funcs,
7139 };
7140
7141 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
7142 {
7143         uint64_t ce_payload_addr;
7144         int cnt_ce;
7145         union {
7146                 struct vi_ce_ib_state regular;
7147                 struct vi_ce_ib_state_chained_ib chained;
7148         } ce_payload = {};
7149
7150         if (ring->adev->virt.chained_ib_support) {
7151                 ce_payload_addr = amdgpu_csa_vaddr(ring->adev) +
7152                         offsetof(struct vi_gfx_meta_data_chained_ib, ce_payload);
7153                 cnt_ce = (sizeof(ce_payload.chained) >> 2) + 4 - 2;
7154         } else {
7155                 ce_payload_addr = amdgpu_csa_vaddr(ring->adev) +
7156                         offsetof(struct vi_gfx_meta_data, ce_payload);
7157                 cnt_ce = (sizeof(ce_payload.regular) >> 2) + 4 - 2;
7158         }
7159
7160         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_ce));
7161         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
7162                                 WRITE_DATA_DST_SEL(8) |
7163                                 WR_CONFIRM) |
7164                                 WRITE_DATA_CACHE_POLICY(0));
7165         amdgpu_ring_write(ring, lower_32_bits(ce_payload_addr));
7166         amdgpu_ring_write(ring, upper_32_bits(ce_payload_addr));
7167         amdgpu_ring_write_multiple(ring, (void *)&ce_payload, cnt_ce - 2);
7168 }
7169
7170 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring)
7171 {
7172         uint64_t de_payload_addr, gds_addr, csa_addr;
7173         int cnt_de;
7174         union {
7175                 struct vi_de_ib_state regular;
7176                 struct vi_de_ib_state_chained_ib chained;
7177         } de_payload = {};
7178
7179         csa_addr = amdgpu_csa_vaddr(ring->adev);
7180         gds_addr = csa_addr + 4096;
7181         if (ring->adev->virt.chained_ib_support) {
7182                 de_payload.chained.gds_backup_addrlo = lower_32_bits(gds_addr);
7183                 de_payload.chained.gds_backup_addrhi = upper_32_bits(gds_addr);
7184                 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data_chained_ib, de_payload);
7185                 cnt_de = (sizeof(de_payload.chained) >> 2) + 4 - 2;
7186         } else {
7187                 de_payload.regular.gds_backup_addrlo = lower_32_bits(gds_addr);
7188                 de_payload.regular.gds_backup_addrhi = upper_32_bits(gds_addr);
7189                 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data, de_payload);
7190                 cnt_de = (sizeof(de_payload.regular) >> 2) + 4 - 2;
7191         }
7192
7193         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_de));
7194         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
7195                                 WRITE_DATA_DST_SEL(8) |
7196                                 WR_CONFIRM) |
7197                                 WRITE_DATA_CACHE_POLICY(0));
7198         amdgpu_ring_write(ring, lower_32_bits(de_payload_addr));
7199         amdgpu_ring_write(ring, upper_32_bits(de_payload_addr));
7200         amdgpu_ring_write_multiple(ring, (void *)&de_payload, cnt_de - 2);
7201 }
This page took 0.458338 seconds and 4 git commands to generate.