]> Git Repo - linux.git/blob - drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
Merge tag 'nfsd-6.2-5' of git://git.kernel.org/pub/scm/linux/kernel/git/cel/linux
[linux.git] / drivers / gpu / drm / amd / amdgpu / gfx_v8_0.c
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23
24 #include <linux/delay.h>
25 #include <linux/kernel.h>
26 #include <linux/firmware.h>
27 #include <linux/module.h>
28 #include <linux/pci.h>
29
30 #include "amdgpu.h"
31 #include "amdgpu_gfx.h"
32 #include "amdgpu_ring.h"
33 #include "vi.h"
34 #include "vi_structs.h"
35 #include "vid.h"
36 #include "amdgpu_ucode.h"
37 #include "amdgpu_atombios.h"
38 #include "atombios_i2c.h"
39 #include "clearstate_vi.h"
40
41 #include "gmc/gmc_8_2_d.h"
42 #include "gmc/gmc_8_2_sh_mask.h"
43
44 #include "oss/oss_3_0_d.h"
45 #include "oss/oss_3_0_sh_mask.h"
46
47 #include "bif/bif_5_0_d.h"
48 #include "bif/bif_5_0_sh_mask.h"
49 #include "gca/gfx_8_0_d.h"
50 #include "gca/gfx_8_0_enum.h"
51 #include "gca/gfx_8_0_sh_mask.h"
52
53 #include "dce/dce_10_0_d.h"
54 #include "dce/dce_10_0_sh_mask.h"
55
56 #include "smu/smu_7_1_3_d.h"
57
58 #include "ivsrcid/ivsrcid_vislands30.h"
59
60 #define GFX8_NUM_GFX_RINGS     1
61 #define GFX8_MEC_HPD_SIZE 4096
62
63 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
64 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
65 #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
66 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
67
68 #define ARRAY_MODE(x)                                   ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
69 #define PIPE_CONFIG(x)                                  ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
70 #define TILE_SPLIT(x)                                   ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
71 #define MICRO_TILE_MODE_NEW(x)                          ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
72 #define SAMPLE_SPLIT(x)                                 ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
73 #define BANK_WIDTH(x)                                   ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
74 #define BANK_HEIGHT(x)                                  ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
75 #define MACRO_TILE_ASPECT(x)                            ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
76 #define NUM_BANKS(x)                                    ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
77
78 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK            0x00000001L
79 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK            0x00000002L
80 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK           0x00000004L
81 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK           0x00000008L
82 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK           0x00000010L
83 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK           0x00000020L
84
85 /* BPM SERDES CMD */
86 #define SET_BPM_SERDES_CMD    1
87 #define CLE_BPM_SERDES_CMD    0
88
89 /* BPM Register Address*/
90 enum {
91         BPM_REG_CGLS_EN = 0,        /* Enable/Disable CGLS */
92         BPM_REG_CGLS_ON,            /* ON/OFF CGLS: shall be controlled by RLC FW */
93         BPM_REG_CGCG_OVERRIDE,      /* Set/Clear CGCG Override */
94         BPM_REG_MGCG_OVERRIDE,      /* Set/Clear MGCG Override */
95         BPM_REG_FGCG_OVERRIDE,      /* Set/Clear FGCG Override */
96         BPM_REG_FGCG_MAX
97 };
98
99 #define RLC_FormatDirectRegListLength        14
100
101 MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
102 MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
103 MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
104 MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
105 MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
106 MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
107
108 MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
109 MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
110 MODULE_FIRMWARE("amdgpu/stoney_me.bin");
111 MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
112 MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
113
114 MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
115 MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
116 MODULE_FIRMWARE("amdgpu/tonga_me.bin");
117 MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
118 MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
119 MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
120
121 MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
122 MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
123 MODULE_FIRMWARE("amdgpu/topaz_me.bin");
124 MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
125 MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
126
127 MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
128 MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
129 MODULE_FIRMWARE("amdgpu/fiji_me.bin");
130 MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
131 MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
132 MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
133
134 MODULE_FIRMWARE("amdgpu/polaris10_ce.bin");
135 MODULE_FIRMWARE("amdgpu/polaris10_ce_2.bin");
136 MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin");
137 MODULE_FIRMWARE("amdgpu/polaris10_pfp_2.bin");
138 MODULE_FIRMWARE("amdgpu/polaris10_me.bin");
139 MODULE_FIRMWARE("amdgpu/polaris10_me_2.bin");
140 MODULE_FIRMWARE("amdgpu/polaris10_mec.bin");
141 MODULE_FIRMWARE("amdgpu/polaris10_mec_2.bin");
142 MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin");
143 MODULE_FIRMWARE("amdgpu/polaris10_mec2_2.bin");
144 MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin");
145
146 MODULE_FIRMWARE("amdgpu/polaris11_ce.bin");
147 MODULE_FIRMWARE("amdgpu/polaris11_ce_2.bin");
148 MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin");
149 MODULE_FIRMWARE("amdgpu/polaris11_pfp_2.bin");
150 MODULE_FIRMWARE("amdgpu/polaris11_me.bin");
151 MODULE_FIRMWARE("amdgpu/polaris11_me_2.bin");
152 MODULE_FIRMWARE("amdgpu/polaris11_mec.bin");
153 MODULE_FIRMWARE("amdgpu/polaris11_mec_2.bin");
154 MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin");
155 MODULE_FIRMWARE("amdgpu/polaris11_mec2_2.bin");
156 MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin");
157
158 MODULE_FIRMWARE("amdgpu/polaris12_ce.bin");
159 MODULE_FIRMWARE("amdgpu/polaris12_ce_2.bin");
160 MODULE_FIRMWARE("amdgpu/polaris12_pfp.bin");
161 MODULE_FIRMWARE("amdgpu/polaris12_pfp_2.bin");
162 MODULE_FIRMWARE("amdgpu/polaris12_me.bin");
163 MODULE_FIRMWARE("amdgpu/polaris12_me_2.bin");
164 MODULE_FIRMWARE("amdgpu/polaris12_mec.bin");
165 MODULE_FIRMWARE("amdgpu/polaris12_mec_2.bin");
166 MODULE_FIRMWARE("amdgpu/polaris12_mec2.bin");
167 MODULE_FIRMWARE("amdgpu/polaris12_mec2_2.bin");
168 MODULE_FIRMWARE("amdgpu/polaris12_rlc.bin");
169
170 MODULE_FIRMWARE("amdgpu/vegam_ce.bin");
171 MODULE_FIRMWARE("amdgpu/vegam_pfp.bin");
172 MODULE_FIRMWARE("amdgpu/vegam_me.bin");
173 MODULE_FIRMWARE("amdgpu/vegam_mec.bin");
174 MODULE_FIRMWARE("amdgpu/vegam_mec2.bin");
175 MODULE_FIRMWARE("amdgpu/vegam_rlc.bin");
176
177 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
178 {
179         {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
180         {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
181         {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
182         {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
183         {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
184         {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
185         {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
186         {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
187         {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
188         {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
189         {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
190         {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
191         {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
192         {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
193         {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
194         {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
195 };
196
197 static const u32 golden_settings_tonga_a11[] =
198 {
199         mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
200         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
201         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
202         mmGB_GPU_ID, 0x0000000f, 0x00000000,
203         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
204         mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
205         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
206         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
207         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
208         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
209         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
210         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
211         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
212         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
213         mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
214         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
215 };
216
217 static const u32 tonga_golden_common_all[] =
218 {
219         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
220         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
221         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
222         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
223         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
224         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
225         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
226         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
227 };
228
229 static const u32 tonga_mgcg_cgcg_init[] =
230 {
231         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
232         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
233         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
234         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
235         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
236         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
237         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
238         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
239         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
240         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
241         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
242         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
243         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
244         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
245         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
246         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
247         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
248         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
249         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
250         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
251         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
252         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
253         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
254         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
255         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
256         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
257         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
258         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
259         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
260         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
261         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
262         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
263         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
264         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
265         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
266         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
267         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
268         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
269         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
270         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
271         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
272         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
273         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
274         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
275         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
276         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
277         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
278         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
279         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
280         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
281         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
282         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
283         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
284         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
285         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
286         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
287         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
288         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
289         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
290         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
291         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
292         mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
293         mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
294         mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
295         mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
296         mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
297         mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
298         mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
299         mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
300         mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
301         mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
302         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
303         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
304         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
305         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
306 };
307
308 static const u32 golden_settings_vegam_a11[] =
309 {
310         mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
311         mmCB_HW_CONTROL_2, 0x0f000000, 0x0d000000,
312         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
313         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
314         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
315         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
316         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x3a00161a,
317         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002e,
318         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
319         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
320         mmSQ_CONFIG, 0x07f80000, 0x01180000,
321         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
322         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
323         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
324         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
325         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x32761054,
326         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
327 };
328
329 static const u32 vegam_golden_common_all[] =
330 {
331         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
332         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
333         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
334         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
335         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
336         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
337 };
338
339 static const u32 golden_settings_polaris11_a11[] =
340 {
341         mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208,
342         mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
343         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
344         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
345         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
346         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
347         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
348         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
349         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
350         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
351         mmSQ_CONFIG, 0x07f80000, 0x01180000,
352         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
353         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
354         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
355         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
356         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
357         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
358 };
359
360 static const u32 polaris11_golden_common_all[] =
361 {
362         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
363         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002,
364         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
365         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
366         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
367         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
368 };
369
370 static const u32 golden_settings_polaris10_a11[] =
371 {
372         mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
373         mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
374         mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
375         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
376         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
377         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
378         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
379         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
380         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a,
381         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
382         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
383         mmSQ_CONFIG, 0x07f80000, 0x07180000,
384         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
385         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
386         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
387         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
388         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
389 };
390
391 static const u32 polaris10_golden_common_all[] =
392 {
393         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
394         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
395         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
396         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
397         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
398         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
399         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
400         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
401 };
402
403 static const u32 fiji_golden_common_all[] =
404 {
405         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
406         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
407         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
408         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
409         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
410         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
411         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
412         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
413         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
414         mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
415 };
416
417 static const u32 golden_settings_fiji_a10[] =
418 {
419         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
420         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
421         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
422         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
423         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
424         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
425         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
426         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
427         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
428         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
429         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
430 };
431
432 static const u32 fiji_mgcg_cgcg_init[] =
433 {
434         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
435         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
436         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
437         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
438         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
439         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
440         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
441         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
442         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
443         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
444         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
445         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
446         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
447         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
448         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
449         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
450         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
451         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
452         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
453         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
454         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
455         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
456         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
457         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
458         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
459         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
460         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
461         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
462         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
463         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
464         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
465         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
466         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
467         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
468         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
469 };
470
471 static const u32 golden_settings_iceland_a11[] =
472 {
473         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
474         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
475         mmDB_DEBUG3, 0xc0000000, 0xc0000000,
476         mmGB_GPU_ID, 0x0000000f, 0x00000000,
477         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
478         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
479         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
480         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
481         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
482         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
483         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
484         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
485         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
486         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
487         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
488         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
489 };
490
491 static const u32 iceland_golden_common_all[] =
492 {
493         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
494         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
495         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
496         mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
497         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
498         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
499         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
500         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
501 };
502
503 static const u32 iceland_mgcg_cgcg_init[] =
504 {
505         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
506         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
507         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
508         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
509         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
510         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
511         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
512         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
513         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
514         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
515         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
516         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
517         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
518         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
519         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
520         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
521         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
522         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
523         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
524         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
525         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
526         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
527         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
528         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
529         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
530         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
531         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
532         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
533         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
534         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
535         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
536         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
537         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
538         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
539         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
540         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
541         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
542         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
543         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
544         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
545         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
546         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
547         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
548         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
549         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
550         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
551         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
552         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
553         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
554         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
555         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
556         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
557         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
558         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
559         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
560         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
561         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
562         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
563         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
564         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
565         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
566         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
567         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
568         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
569 };
570
571 static const u32 cz_golden_settings_a11[] =
572 {
573         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
574         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
575         mmGB_GPU_ID, 0x0000000f, 0x00000000,
576         mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
577         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
578         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
579         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
580         mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
581         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
582         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
583         mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
584         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
585 };
586
587 static const u32 cz_golden_common_all[] =
588 {
589         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
590         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
591         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
592         mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
593         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
594         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
595         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
596         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
597 };
598
599 static const u32 cz_mgcg_cgcg_init[] =
600 {
601         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
602         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
603         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
604         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
605         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
606         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
607         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
608         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
609         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
610         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
611         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
612         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
613         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
614         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
615         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
616         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
617         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
618         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
619         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
620         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
621         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
622         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
623         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
624         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
625         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
626         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
627         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
628         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
629         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
630         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
631         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
632         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
633         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
634         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
635         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
636         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
637         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
638         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
639         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
640         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
641         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
642         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
643         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
644         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
645         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
646         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
647         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
648         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
649         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
650         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
651         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
652         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
653         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
654         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
655         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
656         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
657         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
658         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
659         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
660         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
661         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
662         mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
663         mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
664         mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
665         mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
666         mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
667         mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
668         mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
669         mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
670         mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
671         mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
672         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
673         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
674         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
675         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
676 };
677
678 static const u32 stoney_golden_settings_a11[] =
679 {
680         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
681         mmGB_GPU_ID, 0x0000000f, 0x00000000,
682         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
683         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
684         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
685         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
686         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
687         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
688         mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
689         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
690 };
691
692 static const u32 stoney_golden_common_all[] =
693 {
694         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
695         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
696         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
697         mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
698         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
699         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
700         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
701         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
702 };
703
704 static const u32 stoney_mgcg_cgcg_init[] =
705 {
706         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
707         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
708         mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
709         mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
710         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
711 };
712
713
714 static const char * const sq_edc_source_names[] = {
715         "SQ_EDC_INFO_SOURCE_INVALID: No EDC error has occurred",
716         "SQ_EDC_INFO_SOURCE_INST: EDC source is Instruction Fetch",
717         "SQ_EDC_INFO_SOURCE_SGPR: EDC source is SGPR or SQC data return",
718         "SQ_EDC_INFO_SOURCE_VGPR: EDC source is VGPR",
719         "SQ_EDC_INFO_SOURCE_LDS: EDC source is LDS",
720         "SQ_EDC_INFO_SOURCE_GDS: EDC source is GDS",
721         "SQ_EDC_INFO_SOURCE_TA: EDC source is TA",
722 };
723
724 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
725 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
726 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
727 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
728 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
729 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
730 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring);
731 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring);
732
733 #define CG_ACLK_CNTL__ACLK_DIVIDER_MASK                    0x0000007fL
734 #define CG_ACLK_CNTL__ACLK_DIVIDER__SHIFT                  0x00000000L
735
736 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
737 {
738         uint32_t data;
739
740         switch (adev->asic_type) {
741         case CHIP_TOPAZ:
742                 amdgpu_device_program_register_sequence(adev,
743                                                         iceland_mgcg_cgcg_init,
744                                                         ARRAY_SIZE(iceland_mgcg_cgcg_init));
745                 amdgpu_device_program_register_sequence(adev,
746                                                         golden_settings_iceland_a11,
747                                                         ARRAY_SIZE(golden_settings_iceland_a11));
748                 amdgpu_device_program_register_sequence(adev,
749                                                         iceland_golden_common_all,
750                                                         ARRAY_SIZE(iceland_golden_common_all));
751                 break;
752         case CHIP_FIJI:
753                 amdgpu_device_program_register_sequence(adev,
754                                                         fiji_mgcg_cgcg_init,
755                                                         ARRAY_SIZE(fiji_mgcg_cgcg_init));
756                 amdgpu_device_program_register_sequence(adev,
757                                                         golden_settings_fiji_a10,
758                                                         ARRAY_SIZE(golden_settings_fiji_a10));
759                 amdgpu_device_program_register_sequence(adev,
760                                                         fiji_golden_common_all,
761                                                         ARRAY_SIZE(fiji_golden_common_all));
762                 break;
763
764         case CHIP_TONGA:
765                 amdgpu_device_program_register_sequence(adev,
766                                                         tonga_mgcg_cgcg_init,
767                                                         ARRAY_SIZE(tonga_mgcg_cgcg_init));
768                 amdgpu_device_program_register_sequence(adev,
769                                                         golden_settings_tonga_a11,
770                                                         ARRAY_SIZE(golden_settings_tonga_a11));
771                 amdgpu_device_program_register_sequence(adev,
772                                                         tonga_golden_common_all,
773                                                         ARRAY_SIZE(tonga_golden_common_all));
774                 break;
775         case CHIP_VEGAM:
776                 amdgpu_device_program_register_sequence(adev,
777                                                         golden_settings_vegam_a11,
778                                                         ARRAY_SIZE(golden_settings_vegam_a11));
779                 amdgpu_device_program_register_sequence(adev,
780                                                         vegam_golden_common_all,
781                                                         ARRAY_SIZE(vegam_golden_common_all));
782                 break;
783         case CHIP_POLARIS11:
784         case CHIP_POLARIS12:
785                 amdgpu_device_program_register_sequence(adev,
786                                                         golden_settings_polaris11_a11,
787                                                         ARRAY_SIZE(golden_settings_polaris11_a11));
788                 amdgpu_device_program_register_sequence(adev,
789                                                         polaris11_golden_common_all,
790                                                         ARRAY_SIZE(polaris11_golden_common_all));
791                 break;
792         case CHIP_POLARIS10:
793                 amdgpu_device_program_register_sequence(adev,
794                                                         golden_settings_polaris10_a11,
795                                                         ARRAY_SIZE(golden_settings_polaris10_a11));
796                 amdgpu_device_program_register_sequence(adev,
797                                                         polaris10_golden_common_all,
798                                                         ARRAY_SIZE(polaris10_golden_common_all));
799                 data = RREG32_SMC(ixCG_ACLK_CNTL);
800                 data &= ~CG_ACLK_CNTL__ACLK_DIVIDER_MASK;
801                 data |= 0x18 << CG_ACLK_CNTL__ACLK_DIVIDER__SHIFT;
802                 WREG32_SMC(ixCG_ACLK_CNTL, data);
803                 if ((adev->pdev->device == 0x67DF) && (adev->pdev->revision == 0xc7) &&
804                     ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) ||
805                      (adev->pdev->subsystem_device == 0x4a8 && adev->pdev->subsystem_vendor == 0x1043) ||
806                      (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1680))) {
807                         amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD);
808                         amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0);
809                 }
810                 break;
811         case CHIP_CARRIZO:
812                 amdgpu_device_program_register_sequence(adev,
813                                                         cz_mgcg_cgcg_init,
814                                                         ARRAY_SIZE(cz_mgcg_cgcg_init));
815                 amdgpu_device_program_register_sequence(adev,
816                                                         cz_golden_settings_a11,
817                                                         ARRAY_SIZE(cz_golden_settings_a11));
818                 amdgpu_device_program_register_sequence(adev,
819                                                         cz_golden_common_all,
820                                                         ARRAY_SIZE(cz_golden_common_all));
821                 break;
822         case CHIP_STONEY:
823                 amdgpu_device_program_register_sequence(adev,
824                                                         stoney_mgcg_cgcg_init,
825                                                         ARRAY_SIZE(stoney_mgcg_cgcg_init));
826                 amdgpu_device_program_register_sequence(adev,
827                                                         stoney_golden_settings_a11,
828                                                         ARRAY_SIZE(stoney_golden_settings_a11));
829                 amdgpu_device_program_register_sequence(adev,
830                                                         stoney_golden_common_all,
831                                                         ARRAY_SIZE(stoney_golden_common_all));
832                 break;
833         default:
834                 break;
835         }
836 }
837
838 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
839 {
840         struct amdgpu_device *adev = ring->adev;
841         uint32_t tmp = 0;
842         unsigned i;
843         int r;
844
845         WREG32(mmSCRATCH_REG0, 0xCAFEDEAD);
846         r = amdgpu_ring_alloc(ring, 3);
847         if (r)
848                 return r;
849
850         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
851         amdgpu_ring_write(ring, mmSCRATCH_REG0 - PACKET3_SET_UCONFIG_REG_START);
852         amdgpu_ring_write(ring, 0xDEADBEEF);
853         amdgpu_ring_commit(ring);
854
855         for (i = 0; i < adev->usec_timeout; i++) {
856                 tmp = RREG32(mmSCRATCH_REG0);
857                 if (tmp == 0xDEADBEEF)
858                         break;
859                 udelay(1);
860         }
861
862         if (i >= adev->usec_timeout)
863                 r = -ETIMEDOUT;
864
865         return r;
866 }
867
868 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
869 {
870         struct amdgpu_device *adev = ring->adev;
871         struct amdgpu_ib ib;
872         struct dma_fence *f = NULL;
873
874         unsigned int index;
875         uint64_t gpu_addr;
876         uint32_t tmp;
877         long r;
878
879         r = amdgpu_device_wb_get(adev, &index);
880         if (r)
881                 return r;
882
883         gpu_addr = adev->wb.gpu_addr + (index * 4);
884         adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
885         memset(&ib, 0, sizeof(ib));
886         r = amdgpu_ib_get(adev, NULL, 16,
887                                         AMDGPU_IB_POOL_DIRECT, &ib);
888         if (r)
889                 goto err1;
890
891         ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
892         ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
893         ib.ptr[2] = lower_32_bits(gpu_addr);
894         ib.ptr[3] = upper_32_bits(gpu_addr);
895         ib.ptr[4] = 0xDEADBEEF;
896         ib.length_dw = 5;
897
898         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
899         if (r)
900                 goto err2;
901
902         r = dma_fence_wait_timeout(f, false, timeout);
903         if (r == 0) {
904                 r = -ETIMEDOUT;
905                 goto err2;
906         } else if (r < 0) {
907                 goto err2;
908         }
909
910         tmp = adev->wb.wb[index];
911         if (tmp == 0xDEADBEEF)
912                 r = 0;
913         else
914                 r = -EINVAL;
915
916 err2:
917         amdgpu_ib_free(adev, &ib, NULL);
918         dma_fence_put(f);
919 err1:
920         amdgpu_device_wb_free(adev, index);
921         return r;
922 }
923
924
925 static void gfx_v8_0_free_microcode(struct amdgpu_device *adev)
926 {
927         release_firmware(adev->gfx.pfp_fw);
928         adev->gfx.pfp_fw = NULL;
929         release_firmware(adev->gfx.me_fw);
930         adev->gfx.me_fw = NULL;
931         release_firmware(adev->gfx.ce_fw);
932         adev->gfx.ce_fw = NULL;
933         release_firmware(adev->gfx.rlc_fw);
934         adev->gfx.rlc_fw = NULL;
935         release_firmware(adev->gfx.mec_fw);
936         adev->gfx.mec_fw = NULL;
937         if ((adev->asic_type != CHIP_STONEY) &&
938             (adev->asic_type != CHIP_TOPAZ))
939                 release_firmware(adev->gfx.mec2_fw);
940         adev->gfx.mec2_fw = NULL;
941
942         kfree(adev->gfx.rlc.register_list_format);
943 }
944
945 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
946 {
947         const char *chip_name;
948         char fw_name[30];
949         int err;
950         struct amdgpu_firmware_info *info = NULL;
951         const struct common_firmware_header *header = NULL;
952         const struct gfx_firmware_header_v1_0 *cp_hdr;
953         const struct rlc_firmware_header_v2_0 *rlc_hdr;
954         unsigned int *tmp = NULL, i;
955
956         DRM_DEBUG("\n");
957
958         switch (adev->asic_type) {
959         case CHIP_TOPAZ:
960                 chip_name = "topaz";
961                 break;
962         case CHIP_TONGA:
963                 chip_name = "tonga";
964                 break;
965         case CHIP_CARRIZO:
966                 chip_name = "carrizo";
967                 break;
968         case CHIP_FIJI:
969                 chip_name = "fiji";
970                 break;
971         case CHIP_STONEY:
972                 chip_name = "stoney";
973                 break;
974         case CHIP_POLARIS10:
975                 chip_name = "polaris10";
976                 break;
977         case CHIP_POLARIS11:
978                 chip_name = "polaris11";
979                 break;
980         case CHIP_POLARIS12:
981                 chip_name = "polaris12";
982                 break;
983         case CHIP_VEGAM:
984                 chip_name = "vegam";
985                 break;
986         default:
987                 BUG();
988         }
989
990         if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
991                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp_2.bin", chip_name);
992                 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
993                 if (err == -ENOENT) {
994                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
995                         err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
996                 }
997         } else {
998                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
999                 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
1000         }
1001         if (err)
1002                 goto out;
1003         err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
1004         if (err)
1005                 goto out;
1006         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1007         adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1008         adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1009
1010         if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1011                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me_2.bin", chip_name);
1012                 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1013                 if (err == -ENOENT) {
1014                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1015                         err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1016                 }
1017         } else {
1018                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1019                 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1020         }
1021         if (err)
1022                 goto out;
1023         err = amdgpu_ucode_validate(adev->gfx.me_fw);
1024         if (err)
1025                 goto out;
1026         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1027         adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1028
1029         adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1030
1031         if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1032                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce_2.bin", chip_name);
1033                 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1034                 if (err == -ENOENT) {
1035                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1036                         err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1037                 }
1038         } else {
1039                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1040                 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1041         }
1042         if (err)
1043                 goto out;
1044         err = amdgpu_ucode_validate(adev->gfx.ce_fw);
1045         if (err)
1046                 goto out;
1047         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1048         adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1049         adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1050
1051         /*
1052          * Support for MCBP/Virtualization in combination with chained IBs is
1053          * formal released on feature version #46
1054          */
1055         if (adev->gfx.ce_feature_version >= 46 &&
1056             adev->gfx.pfp_feature_version >= 46) {
1057                 adev->virt.chained_ib_support = true;
1058                 DRM_INFO("Chained IB support enabled!\n");
1059         } else
1060                 adev->virt.chained_ib_support = false;
1061
1062         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
1063         err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
1064         if (err)
1065                 goto out;
1066         err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
1067         rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1068         adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
1069         adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
1070
1071         adev->gfx.rlc.save_and_restore_offset =
1072                         le32_to_cpu(rlc_hdr->save_and_restore_offset);
1073         adev->gfx.rlc.clear_state_descriptor_offset =
1074                         le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
1075         adev->gfx.rlc.avail_scratch_ram_locations =
1076                         le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
1077         adev->gfx.rlc.reg_restore_list_size =
1078                         le32_to_cpu(rlc_hdr->reg_restore_list_size);
1079         adev->gfx.rlc.reg_list_format_start =
1080                         le32_to_cpu(rlc_hdr->reg_list_format_start);
1081         adev->gfx.rlc.reg_list_format_separate_start =
1082                         le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
1083         adev->gfx.rlc.starting_offsets_start =
1084                         le32_to_cpu(rlc_hdr->starting_offsets_start);
1085         adev->gfx.rlc.reg_list_format_size_bytes =
1086                         le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
1087         adev->gfx.rlc.reg_list_size_bytes =
1088                         le32_to_cpu(rlc_hdr->reg_list_size_bytes);
1089
1090         adev->gfx.rlc.register_list_format =
1091                         kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
1092                                         adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
1093
1094         if (!adev->gfx.rlc.register_list_format) {
1095                 err = -ENOMEM;
1096                 goto out;
1097         }
1098
1099         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1100                         le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1101         for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++)
1102                 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
1103
1104         adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1105
1106         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1107                         le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1108         for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++)
1109                 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
1110
1111         if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1112                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec_2.bin", chip_name);
1113                 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1114                 if (err == -ENOENT) {
1115                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1116                         err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1117                 }
1118         } else {
1119                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1120                 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1121         }
1122         if (err)
1123                 goto out;
1124         err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1125         if (err)
1126                 goto out;
1127         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1128         adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1129         adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1130
1131         if ((adev->asic_type != CHIP_STONEY) &&
1132             (adev->asic_type != CHIP_TOPAZ)) {
1133                 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1134                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2_2.bin", chip_name);
1135                         err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1136                         if (err == -ENOENT) {
1137                                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1138                                 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1139                         }
1140                 } else {
1141                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1142                         err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1143                 }
1144                 if (!err) {
1145                         err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1146                         if (err)
1147                                 goto out;
1148                         cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1149                                 adev->gfx.mec2_fw->data;
1150                         adev->gfx.mec2_fw_version =
1151                                 le32_to_cpu(cp_hdr->header.ucode_version);
1152                         adev->gfx.mec2_feature_version =
1153                                 le32_to_cpu(cp_hdr->ucode_feature_version);
1154                 } else {
1155                         err = 0;
1156                         adev->gfx.mec2_fw = NULL;
1157                 }
1158         }
1159
1160         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1161         info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1162         info->fw = adev->gfx.pfp_fw;
1163         header = (const struct common_firmware_header *)info->fw->data;
1164         adev->firmware.fw_size +=
1165                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1166
1167         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1168         info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1169         info->fw = adev->gfx.me_fw;
1170         header = (const struct common_firmware_header *)info->fw->data;
1171         adev->firmware.fw_size +=
1172                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1173
1174         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1175         info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1176         info->fw = adev->gfx.ce_fw;
1177         header = (const struct common_firmware_header *)info->fw->data;
1178         adev->firmware.fw_size +=
1179                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1180
1181         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1182         info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1183         info->fw = adev->gfx.rlc_fw;
1184         header = (const struct common_firmware_header *)info->fw->data;
1185         adev->firmware.fw_size +=
1186                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1187
1188         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1189         info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1190         info->fw = adev->gfx.mec_fw;
1191         header = (const struct common_firmware_header *)info->fw->data;
1192         adev->firmware.fw_size +=
1193                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1194
1195         /* we need account JT in */
1196         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1197         adev->firmware.fw_size +=
1198                 ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE);
1199
1200         if (amdgpu_sriov_vf(adev)) {
1201                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_STORAGE];
1202                 info->ucode_id = AMDGPU_UCODE_ID_STORAGE;
1203                 info->fw = adev->gfx.mec_fw;
1204                 adev->firmware.fw_size +=
1205                         ALIGN(le32_to_cpu(64 * PAGE_SIZE), PAGE_SIZE);
1206         }
1207
1208         if (adev->gfx.mec2_fw) {
1209                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1210                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1211                 info->fw = adev->gfx.mec2_fw;
1212                 header = (const struct common_firmware_header *)info->fw->data;
1213                 adev->firmware.fw_size +=
1214                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1215         }
1216
1217 out:
1218         if (err) {
1219                 dev_err(adev->dev,
1220                         "gfx8: Failed to load firmware \"%s\"\n",
1221                         fw_name);
1222                 release_firmware(adev->gfx.pfp_fw);
1223                 adev->gfx.pfp_fw = NULL;
1224                 release_firmware(adev->gfx.me_fw);
1225                 adev->gfx.me_fw = NULL;
1226                 release_firmware(adev->gfx.ce_fw);
1227                 adev->gfx.ce_fw = NULL;
1228                 release_firmware(adev->gfx.rlc_fw);
1229                 adev->gfx.rlc_fw = NULL;
1230                 release_firmware(adev->gfx.mec_fw);
1231                 adev->gfx.mec_fw = NULL;
1232                 release_firmware(adev->gfx.mec2_fw);
1233                 adev->gfx.mec2_fw = NULL;
1234         }
1235         return err;
1236 }
1237
1238 static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
1239                                     volatile u32 *buffer)
1240 {
1241         u32 count = 0, i;
1242         const struct cs_section_def *sect = NULL;
1243         const struct cs_extent_def *ext = NULL;
1244
1245         if (adev->gfx.rlc.cs_data == NULL)
1246                 return;
1247         if (buffer == NULL)
1248                 return;
1249
1250         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1251         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1252
1253         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1254         buffer[count++] = cpu_to_le32(0x80000000);
1255         buffer[count++] = cpu_to_le32(0x80000000);
1256
1257         for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1258                 for (ext = sect->section; ext->extent != NULL; ++ext) {
1259                         if (sect->id == SECT_CONTEXT) {
1260                                 buffer[count++] =
1261                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1262                                 buffer[count++] = cpu_to_le32(ext->reg_index -
1263                                                 PACKET3_SET_CONTEXT_REG_START);
1264                                 for (i = 0; i < ext->reg_count; i++)
1265                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
1266                         } else {
1267                                 return;
1268                         }
1269                 }
1270         }
1271
1272         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1273         buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
1274                         PACKET3_SET_CONTEXT_REG_START);
1275         buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config);
1276         buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1);
1277
1278         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1279         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1280
1281         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1282         buffer[count++] = cpu_to_le32(0);
1283 }
1284
1285 static int gfx_v8_0_cp_jump_table_num(struct amdgpu_device *adev)
1286 {
1287         if (adev->asic_type == CHIP_CARRIZO)
1288                 return 5;
1289         else
1290                 return 4;
1291 }
1292
1293 static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
1294 {
1295         const struct cs_section_def *cs_data;
1296         int r;
1297
1298         adev->gfx.rlc.cs_data = vi_cs_data;
1299
1300         cs_data = adev->gfx.rlc.cs_data;
1301
1302         if (cs_data) {
1303                 /* init clear state block */
1304                 r = amdgpu_gfx_rlc_init_csb(adev);
1305                 if (r)
1306                         return r;
1307         }
1308
1309         if ((adev->asic_type == CHIP_CARRIZO) ||
1310             (adev->asic_type == CHIP_STONEY)) {
1311                 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1312                 r = amdgpu_gfx_rlc_init_cpt(adev);
1313                 if (r)
1314                         return r;
1315         }
1316
1317         /* init spm vmid with 0xf */
1318         if (adev->gfx.rlc.funcs->update_spm_vmid)
1319                 adev->gfx.rlc.funcs->update_spm_vmid(adev, 0xf);
1320
1321         return 0;
1322 }
1323
1324 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
1325 {
1326         amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1327 }
1328
1329 static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
1330 {
1331         int r;
1332         u32 *hpd;
1333         size_t mec_hpd_size;
1334
1335         bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1336
1337         /* take ownership of the relevant compute queues */
1338         amdgpu_gfx_compute_queue_acquire(adev);
1339
1340         mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE;
1341         if (mec_hpd_size) {
1342                 r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1343                                               AMDGPU_GEM_DOMAIN_VRAM,
1344                                               &adev->gfx.mec.hpd_eop_obj,
1345                                               &adev->gfx.mec.hpd_eop_gpu_addr,
1346                                               (void **)&hpd);
1347                 if (r) {
1348                         dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1349                         return r;
1350                 }
1351
1352                 memset(hpd, 0, mec_hpd_size);
1353
1354                 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1355                 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1356         }
1357
1358         return 0;
1359 }
1360
1361 static const u32 vgpr_init_compute_shader[] =
1362 {
1363         0x7e000209, 0x7e020208,
1364         0x7e040207, 0x7e060206,
1365         0x7e080205, 0x7e0a0204,
1366         0x7e0c0203, 0x7e0e0202,
1367         0x7e100201, 0x7e120200,
1368         0x7e140209, 0x7e160208,
1369         0x7e180207, 0x7e1a0206,
1370         0x7e1c0205, 0x7e1e0204,
1371         0x7e200203, 0x7e220202,
1372         0x7e240201, 0x7e260200,
1373         0x7e280209, 0x7e2a0208,
1374         0x7e2c0207, 0x7e2e0206,
1375         0x7e300205, 0x7e320204,
1376         0x7e340203, 0x7e360202,
1377         0x7e380201, 0x7e3a0200,
1378         0x7e3c0209, 0x7e3e0208,
1379         0x7e400207, 0x7e420206,
1380         0x7e440205, 0x7e460204,
1381         0x7e480203, 0x7e4a0202,
1382         0x7e4c0201, 0x7e4e0200,
1383         0x7e500209, 0x7e520208,
1384         0x7e540207, 0x7e560206,
1385         0x7e580205, 0x7e5a0204,
1386         0x7e5c0203, 0x7e5e0202,
1387         0x7e600201, 0x7e620200,
1388         0x7e640209, 0x7e660208,
1389         0x7e680207, 0x7e6a0206,
1390         0x7e6c0205, 0x7e6e0204,
1391         0x7e700203, 0x7e720202,
1392         0x7e740201, 0x7e760200,
1393         0x7e780209, 0x7e7a0208,
1394         0x7e7c0207, 0x7e7e0206,
1395         0xbf8a0000, 0xbf810000,
1396 };
1397
1398 static const u32 sgpr_init_compute_shader[] =
1399 {
1400         0xbe8a0100, 0xbe8c0102,
1401         0xbe8e0104, 0xbe900106,
1402         0xbe920108, 0xbe940100,
1403         0xbe960102, 0xbe980104,
1404         0xbe9a0106, 0xbe9c0108,
1405         0xbe9e0100, 0xbea00102,
1406         0xbea20104, 0xbea40106,
1407         0xbea60108, 0xbea80100,
1408         0xbeaa0102, 0xbeac0104,
1409         0xbeae0106, 0xbeb00108,
1410         0xbeb20100, 0xbeb40102,
1411         0xbeb60104, 0xbeb80106,
1412         0xbeba0108, 0xbebc0100,
1413         0xbebe0102, 0xbec00104,
1414         0xbec20106, 0xbec40108,
1415         0xbec60100, 0xbec80102,
1416         0xbee60004, 0xbee70005,
1417         0xbeea0006, 0xbeeb0007,
1418         0xbee80008, 0xbee90009,
1419         0xbefc0000, 0xbf8a0000,
1420         0xbf810000, 0x00000000,
1421 };
1422
1423 static const u32 vgpr_init_regs[] =
1424 {
1425         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1426         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */
1427         mmCOMPUTE_NUM_THREAD_X, 256*4,
1428         mmCOMPUTE_NUM_THREAD_Y, 1,
1429         mmCOMPUTE_NUM_THREAD_Z, 1,
1430         mmCOMPUTE_PGM_RSRC1, 0x100004f, /* VGPRS=15 (64 logical VGPRs), SGPRS=1 (16 SGPRs), BULKY=1 */
1431         mmCOMPUTE_PGM_RSRC2, 20,
1432         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1433         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1434         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1435         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1436         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1437         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1438         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1439         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1440         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1441         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1442 };
1443
1444 static const u32 sgpr1_init_regs[] =
1445 {
1446         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1447         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */
1448         mmCOMPUTE_NUM_THREAD_X, 256*5,
1449         mmCOMPUTE_NUM_THREAD_Y, 1,
1450         mmCOMPUTE_NUM_THREAD_Z, 1,
1451         mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */
1452         mmCOMPUTE_PGM_RSRC2, 20,
1453         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1454         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1455         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1456         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1457         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1458         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1459         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1460         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1461         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1462         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1463 };
1464
1465 static const u32 sgpr2_init_regs[] =
1466 {
1467         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1468         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1469         mmCOMPUTE_NUM_THREAD_X, 256*5,
1470         mmCOMPUTE_NUM_THREAD_Y, 1,
1471         mmCOMPUTE_NUM_THREAD_Z, 1,
1472         mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */
1473         mmCOMPUTE_PGM_RSRC2, 20,
1474         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1475         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1476         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1477         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1478         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1479         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1480         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1481         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1482         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1483         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1484 };
1485
1486 static const u32 sec_ded_counter_registers[] =
1487 {
1488         mmCPC_EDC_ATC_CNT,
1489         mmCPC_EDC_SCRATCH_CNT,
1490         mmCPC_EDC_UCODE_CNT,
1491         mmCPF_EDC_ATC_CNT,
1492         mmCPF_EDC_ROQ_CNT,
1493         mmCPF_EDC_TAG_CNT,
1494         mmCPG_EDC_ATC_CNT,
1495         mmCPG_EDC_DMA_CNT,
1496         mmCPG_EDC_TAG_CNT,
1497         mmDC_EDC_CSINVOC_CNT,
1498         mmDC_EDC_RESTORE_CNT,
1499         mmDC_EDC_STATE_CNT,
1500         mmGDS_EDC_CNT,
1501         mmGDS_EDC_GRBM_CNT,
1502         mmGDS_EDC_OA_DED,
1503         mmSPI_EDC_CNT,
1504         mmSQC_ATC_EDC_GATCL1_CNT,
1505         mmSQC_EDC_CNT,
1506         mmSQ_EDC_DED_CNT,
1507         mmSQ_EDC_INFO,
1508         mmSQ_EDC_SEC_CNT,
1509         mmTCC_EDC_CNT,
1510         mmTCP_ATC_EDC_GATCL1_CNT,
1511         mmTCP_EDC_CNT,
1512         mmTD_EDC_CNT
1513 };
1514
1515 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1516 {
1517         struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1518         struct amdgpu_ib ib;
1519         struct dma_fence *f = NULL;
1520         int r, i;
1521         u32 tmp;
1522         unsigned total_size, vgpr_offset, sgpr_offset;
1523         u64 gpu_addr;
1524
1525         /* only supported on CZ */
1526         if (adev->asic_type != CHIP_CARRIZO)
1527                 return 0;
1528
1529         /* bail if the compute ring is not ready */
1530         if (!ring->sched.ready)
1531                 return 0;
1532
1533         tmp = RREG32(mmGB_EDC_MODE);
1534         WREG32(mmGB_EDC_MODE, 0);
1535
1536         total_size =
1537                 (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1538         total_size +=
1539                 (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1540         total_size +=
1541                 (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1542         total_size = ALIGN(total_size, 256);
1543         vgpr_offset = total_size;
1544         total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1545         sgpr_offset = total_size;
1546         total_size += sizeof(sgpr_init_compute_shader);
1547
1548         /* allocate an indirect buffer to put the commands in */
1549         memset(&ib, 0, sizeof(ib));
1550         r = amdgpu_ib_get(adev, NULL, total_size,
1551                                         AMDGPU_IB_POOL_DIRECT, &ib);
1552         if (r) {
1553                 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1554                 return r;
1555         }
1556
1557         /* load the compute shaders */
1558         for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1559                 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1560
1561         for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1562                 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1563
1564         /* init the ib length to 0 */
1565         ib.length_dw = 0;
1566
1567         /* VGPR */
1568         /* write the register state for the compute dispatch */
1569         for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1570                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1571                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1572                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1573         }
1574         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1575         gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1576         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1577         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1578         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1579         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1580
1581         /* write dispatch packet */
1582         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1583         ib.ptr[ib.length_dw++] = 8; /* x */
1584         ib.ptr[ib.length_dw++] = 1; /* y */
1585         ib.ptr[ib.length_dw++] = 1; /* z */
1586         ib.ptr[ib.length_dw++] =
1587                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1588
1589         /* write CS partial flush packet */
1590         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1591         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1592
1593         /* SGPR1 */
1594         /* write the register state for the compute dispatch */
1595         for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1596                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1597                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1598                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1599         }
1600         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1601         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1602         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1603         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1604         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1605         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1606
1607         /* write dispatch packet */
1608         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1609         ib.ptr[ib.length_dw++] = 8; /* x */
1610         ib.ptr[ib.length_dw++] = 1; /* y */
1611         ib.ptr[ib.length_dw++] = 1; /* z */
1612         ib.ptr[ib.length_dw++] =
1613                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1614
1615         /* write CS partial flush packet */
1616         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1617         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1618
1619         /* SGPR2 */
1620         /* write the register state for the compute dispatch */
1621         for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1622                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1623                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1624                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1625         }
1626         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1627         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1628         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1629         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1630         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1631         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1632
1633         /* write dispatch packet */
1634         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1635         ib.ptr[ib.length_dw++] = 8; /* x */
1636         ib.ptr[ib.length_dw++] = 1; /* y */
1637         ib.ptr[ib.length_dw++] = 1; /* z */
1638         ib.ptr[ib.length_dw++] =
1639                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1640
1641         /* write CS partial flush packet */
1642         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1643         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1644
1645         /* shedule the ib on the ring */
1646         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1647         if (r) {
1648                 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1649                 goto fail;
1650         }
1651
1652         /* wait for the GPU to finish processing the IB */
1653         r = dma_fence_wait(f, false);
1654         if (r) {
1655                 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1656                 goto fail;
1657         }
1658
1659         tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1660         tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1661         WREG32(mmGB_EDC_MODE, tmp);
1662
1663         tmp = RREG32(mmCC_GC_EDC_CONFIG);
1664         tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1665         WREG32(mmCC_GC_EDC_CONFIG, tmp);
1666
1667
1668         /* read back registers to clear the counters */
1669         for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1670                 RREG32(sec_ded_counter_registers[i]);
1671
1672 fail:
1673         amdgpu_ib_free(adev, &ib, NULL);
1674         dma_fence_put(f);
1675
1676         return r;
1677 }
1678
1679 static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1680 {
1681         u32 gb_addr_config;
1682         u32 mc_arb_ramcfg;
1683         u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1684         u32 tmp;
1685         int ret;
1686
1687         switch (adev->asic_type) {
1688         case CHIP_TOPAZ:
1689                 adev->gfx.config.max_shader_engines = 1;
1690                 adev->gfx.config.max_tile_pipes = 2;
1691                 adev->gfx.config.max_cu_per_sh = 6;
1692                 adev->gfx.config.max_sh_per_se = 1;
1693                 adev->gfx.config.max_backends_per_se = 2;
1694                 adev->gfx.config.max_texture_channel_caches = 2;
1695                 adev->gfx.config.max_gprs = 256;
1696                 adev->gfx.config.max_gs_threads = 32;
1697                 adev->gfx.config.max_hw_contexts = 8;
1698
1699                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1700                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1701                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1702                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1703                 gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1704                 break;
1705         case CHIP_FIJI:
1706                 adev->gfx.config.max_shader_engines = 4;
1707                 adev->gfx.config.max_tile_pipes = 16;
1708                 adev->gfx.config.max_cu_per_sh = 16;
1709                 adev->gfx.config.max_sh_per_se = 1;
1710                 adev->gfx.config.max_backends_per_se = 4;
1711                 adev->gfx.config.max_texture_channel_caches = 16;
1712                 adev->gfx.config.max_gprs = 256;
1713                 adev->gfx.config.max_gs_threads = 32;
1714                 adev->gfx.config.max_hw_contexts = 8;
1715
1716                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1717                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1718                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1719                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1720                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1721                 break;
1722         case CHIP_POLARIS11:
1723         case CHIP_POLARIS12:
1724                 ret = amdgpu_atombios_get_gfx_info(adev);
1725                 if (ret)
1726                         return ret;
1727                 adev->gfx.config.max_gprs = 256;
1728                 adev->gfx.config.max_gs_threads = 32;
1729                 adev->gfx.config.max_hw_contexts = 8;
1730
1731                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1732                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1733                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1734                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1735                 gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
1736                 break;
1737         case CHIP_POLARIS10:
1738         case CHIP_VEGAM:
1739                 ret = amdgpu_atombios_get_gfx_info(adev);
1740                 if (ret)
1741                         return ret;
1742                 adev->gfx.config.max_gprs = 256;
1743                 adev->gfx.config.max_gs_threads = 32;
1744                 adev->gfx.config.max_hw_contexts = 8;
1745
1746                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1747                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1748                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1749                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1750                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1751                 break;
1752         case CHIP_TONGA:
1753                 adev->gfx.config.max_shader_engines = 4;
1754                 adev->gfx.config.max_tile_pipes = 8;
1755                 adev->gfx.config.max_cu_per_sh = 8;
1756                 adev->gfx.config.max_sh_per_se = 1;
1757                 adev->gfx.config.max_backends_per_se = 2;
1758                 adev->gfx.config.max_texture_channel_caches = 8;
1759                 adev->gfx.config.max_gprs = 256;
1760                 adev->gfx.config.max_gs_threads = 32;
1761                 adev->gfx.config.max_hw_contexts = 8;
1762
1763                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1764                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1765                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1766                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1767                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1768                 break;
1769         case CHIP_CARRIZO:
1770                 adev->gfx.config.max_shader_engines = 1;
1771                 adev->gfx.config.max_tile_pipes = 2;
1772                 adev->gfx.config.max_sh_per_se = 1;
1773                 adev->gfx.config.max_backends_per_se = 2;
1774                 adev->gfx.config.max_cu_per_sh = 8;
1775                 adev->gfx.config.max_texture_channel_caches = 2;
1776                 adev->gfx.config.max_gprs = 256;
1777                 adev->gfx.config.max_gs_threads = 32;
1778                 adev->gfx.config.max_hw_contexts = 8;
1779
1780                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1781                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1782                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1783                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1784                 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1785                 break;
1786         case CHIP_STONEY:
1787                 adev->gfx.config.max_shader_engines = 1;
1788                 adev->gfx.config.max_tile_pipes = 2;
1789                 adev->gfx.config.max_sh_per_se = 1;
1790                 adev->gfx.config.max_backends_per_se = 1;
1791                 adev->gfx.config.max_cu_per_sh = 3;
1792                 adev->gfx.config.max_texture_channel_caches = 2;
1793                 adev->gfx.config.max_gprs = 256;
1794                 adev->gfx.config.max_gs_threads = 16;
1795                 adev->gfx.config.max_hw_contexts = 8;
1796
1797                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1798                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1799                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1800                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1801                 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1802                 break;
1803         default:
1804                 adev->gfx.config.max_shader_engines = 2;
1805                 adev->gfx.config.max_tile_pipes = 4;
1806                 adev->gfx.config.max_cu_per_sh = 2;
1807                 adev->gfx.config.max_sh_per_se = 1;
1808                 adev->gfx.config.max_backends_per_se = 2;
1809                 adev->gfx.config.max_texture_channel_caches = 4;
1810                 adev->gfx.config.max_gprs = 256;
1811                 adev->gfx.config.max_gs_threads = 32;
1812                 adev->gfx.config.max_hw_contexts = 8;
1813
1814                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1815                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1816                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1817                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1818                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1819                 break;
1820         }
1821
1822         adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1823         mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1824
1825         adev->gfx.config.num_banks = REG_GET_FIELD(mc_arb_ramcfg,
1826                                 MC_ARB_RAMCFG, NOOFBANK);
1827         adev->gfx.config.num_ranks = REG_GET_FIELD(mc_arb_ramcfg,
1828                                 MC_ARB_RAMCFG, NOOFRANKS);
1829
1830         adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1831         adev->gfx.config.mem_max_burst_length_bytes = 256;
1832         if (adev->flags & AMD_IS_APU) {
1833                 /* Get memory bank mapping mode. */
1834                 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
1835                 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1836                 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1837
1838                 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
1839                 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1840                 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1841
1842                 /* Validate settings in case only one DIMM installed. */
1843                 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
1844                         dimm00_addr_map = 0;
1845                 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
1846                         dimm01_addr_map = 0;
1847                 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
1848                         dimm10_addr_map = 0;
1849                 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
1850                         dimm11_addr_map = 0;
1851
1852                 /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1853                 /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1854                 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
1855                         adev->gfx.config.mem_row_size_in_kb = 2;
1856                 else
1857                         adev->gfx.config.mem_row_size_in_kb = 1;
1858         } else {
1859                 tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
1860                 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1861                 if (adev->gfx.config.mem_row_size_in_kb > 4)
1862                         adev->gfx.config.mem_row_size_in_kb = 4;
1863         }
1864
1865         adev->gfx.config.shader_engine_tile_size = 32;
1866         adev->gfx.config.num_gpus = 1;
1867         adev->gfx.config.multi_gpu_tile_size = 64;
1868
1869         /* fix up row size */
1870         switch (adev->gfx.config.mem_row_size_in_kb) {
1871         case 1:
1872         default:
1873                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
1874                 break;
1875         case 2:
1876                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
1877                 break;
1878         case 4:
1879                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
1880                 break;
1881         }
1882         adev->gfx.config.gb_addr_config = gb_addr_config;
1883
1884         return 0;
1885 }
1886
1887 static int gfx_v8_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
1888                                         int mec, int pipe, int queue)
1889 {
1890         int r;
1891         unsigned irq_type;
1892         struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
1893         unsigned int hw_prio;
1894
1895         ring = &adev->gfx.compute_ring[ring_id];
1896
1897         /* mec0 is me1 */
1898         ring->me = mec + 1;
1899         ring->pipe = pipe;
1900         ring->queue = queue;
1901
1902         ring->ring_obj = NULL;
1903         ring->use_doorbell = true;
1904         ring->doorbell_index = adev->doorbell_index.mec_ring0 + ring_id;
1905         ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
1906                                 + (ring_id * GFX8_MEC_HPD_SIZE);
1907         sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
1908
1909         irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
1910                 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
1911                 + ring->pipe;
1912
1913         hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ?
1914                         AMDGPU_RING_PRIO_2 : AMDGPU_RING_PRIO_DEFAULT;
1915         /* type-2 packets are deprecated on MEC, use type-3 instead */
1916         r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
1917                              hw_prio, NULL);
1918         if (r)
1919                 return r;
1920
1921
1922         return 0;
1923 }
1924
1925 static void gfx_v8_0_sq_irq_work_func(struct work_struct *work);
1926
1927 static int gfx_v8_0_sw_init(void *handle)
1928 {
1929         int i, j, k, r, ring_id;
1930         struct amdgpu_ring *ring;
1931         struct amdgpu_kiq *kiq;
1932         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1933
1934         switch (adev->asic_type) {
1935         case CHIP_TONGA:
1936         case CHIP_CARRIZO:
1937         case CHIP_FIJI:
1938         case CHIP_POLARIS10:
1939         case CHIP_POLARIS11:
1940         case CHIP_POLARIS12:
1941         case CHIP_VEGAM:
1942                 adev->gfx.mec.num_mec = 2;
1943                 break;
1944         case CHIP_TOPAZ:
1945         case CHIP_STONEY:
1946         default:
1947                 adev->gfx.mec.num_mec = 1;
1948                 break;
1949         }
1950
1951         adev->gfx.mec.num_pipe_per_mec = 4;
1952         adev->gfx.mec.num_queue_per_pipe = 8;
1953
1954         /* EOP Event */
1955         r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_END_OF_PIPE, &adev->gfx.eop_irq);
1956         if (r)
1957                 return r;
1958
1959         /* Privileged reg */
1960         r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_PRIV_REG_FAULT,
1961                               &adev->gfx.priv_reg_irq);
1962         if (r)
1963                 return r;
1964
1965         /* Privileged inst */
1966         r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_PRIV_INSTR_FAULT,
1967                               &adev->gfx.priv_inst_irq);
1968         if (r)
1969                 return r;
1970
1971         /* Add CP EDC/ECC irq  */
1972         r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_ECC_ERROR,
1973                               &adev->gfx.cp_ecc_error_irq);
1974         if (r)
1975                 return r;
1976
1977         /* SQ interrupts. */
1978         r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SQ_INTERRUPT_MSG,
1979                               &adev->gfx.sq_irq);
1980         if (r) {
1981                 DRM_ERROR("amdgpu_irq_add() for SQ failed: %d\n", r);
1982                 return r;
1983         }
1984
1985         INIT_WORK(&adev->gfx.sq_work.work, gfx_v8_0_sq_irq_work_func);
1986
1987         adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
1988
1989         r = gfx_v8_0_init_microcode(adev);
1990         if (r) {
1991                 DRM_ERROR("Failed to load gfx firmware!\n");
1992                 return r;
1993         }
1994
1995         r = adev->gfx.rlc.funcs->init(adev);
1996         if (r) {
1997                 DRM_ERROR("Failed to init rlc BOs!\n");
1998                 return r;
1999         }
2000
2001         r = gfx_v8_0_mec_init(adev);
2002         if (r) {
2003                 DRM_ERROR("Failed to init MEC BOs!\n");
2004                 return r;
2005         }
2006
2007         /* set up the gfx ring */
2008         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2009                 ring = &adev->gfx.gfx_ring[i];
2010                 ring->ring_obj = NULL;
2011                 sprintf(ring->name, "gfx");
2012                 /* no gfx doorbells on iceland */
2013                 if (adev->asic_type != CHIP_TOPAZ) {
2014                         ring->use_doorbell = true;
2015                         ring->doorbell_index = adev->doorbell_index.gfx_ring0;
2016                 }
2017
2018                 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2019                                      AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP,
2020                                      AMDGPU_RING_PRIO_DEFAULT, NULL);
2021                 if (r)
2022                         return r;
2023         }
2024
2025
2026         /* set up the compute queues - allocate horizontally across pipes */
2027         ring_id = 0;
2028         for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
2029                 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
2030                         for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
2031                                 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
2032                                         continue;
2033
2034                                 r = gfx_v8_0_compute_ring_init(adev,
2035                                                                 ring_id,
2036                                                                 i, k, j);
2037                                 if (r)
2038                                         return r;
2039
2040                                 ring_id++;
2041                         }
2042                 }
2043         }
2044
2045         r = amdgpu_gfx_kiq_init(adev, GFX8_MEC_HPD_SIZE);
2046         if (r) {
2047                 DRM_ERROR("Failed to init KIQ BOs!\n");
2048                 return r;
2049         }
2050
2051         kiq = &adev->gfx.kiq;
2052         r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
2053         if (r)
2054                 return r;
2055
2056         /* create MQD for all compute queues as well as KIQ for SRIOV case */
2057         r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct vi_mqd_allocation));
2058         if (r)
2059                 return r;
2060
2061         adev->gfx.ce_ram_size = 0x8000;
2062
2063         r = gfx_v8_0_gpu_early_init(adev);
2064         if (r)
2065                 return r;
2066
2067         return 0;
2068 }
2069
2070 static int gfx_v8_0_sw_fini(void *handle)
2071 {
2072         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2073         int i;
2074
2075         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2076                 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2077         for (i = 0; i < adev->gfx.num_compute_rings; i++)
2078                 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2079
2080         amdgpu_gfx_mqd_sw_fini(adev);
2081         amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring);
2082         amdgpu_gfx_kiq_fini(adev);
2083
2084         gfx_v8_0_mec_fini(adev);
2085         amdgpu_gfx_rlc_fini(adev);
2086         amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
2087                                 &adev->gfx.rlc.clear_state_gpu_addr,
2088                                 (void **)&adev->gfx.rlc.cs_ptr);
2089         if ((adev->asic_type == CHIP_CARRIZO) ||
2090             (adev->asic_type == CHIP_STONEY)) {
2091                 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
2092                                 &adev->gfx.rlc.cp_table_gpu_addr,
2093                                 (void **)&adev->gfx.rlc.cp_table_ptr);
2094         }
2095         gfx_v8_0_free_microcode(adev);
2096
2097         return 0;
2098 }
2099
2100 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
2101 {
2102         uint32_t *modearray, *mod2array;
2103         const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
2104         const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
2105         u32 reg_offset;
2106
2107         modearray = adev->gfx.config.tile_mode_array;
2108         mod2array = adev->gfx.config.macrotile_mode_array;
2109
2110         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2111                 modearray[reg_offset] = 0;
2112
2113         for (reg_offset = 0; reg_offset <  num_secondary_tile_mode_states; reg_offset++)
2114                 mod2array[reg_offset] = 0;
2115
2116         switch (adev->asic_type) {
2117         case CHIP_TOPAZ:
2118                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2119                                 PIPE_CONFIG(ADDR_SURF_P2) |
2120                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2121                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2122                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2123                                 PIPE_CONFIG(ADDR_SURF_P2) |
2124                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2125                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2126                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2127                                 PIPE_CONFIG(ADDR_SURF_P2) |
2128                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2129                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2130                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2131                                 PIPE_CONFIG(ADDR_SURF_P2) |
2132                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2133                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2134                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2135                                 PIPE_CONFIG(ADDR_SURF_P2) |
2136                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2137                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2138                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2139                                 PIPE_CONFIG(ADDR_SURF_P2) |
2140                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2141                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2142                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2143                                 PIPE_CONFIG(ADDR_SURF_P2) |
2144                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2145                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2146                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2147                                 PIPE_CONFIG(ADDR_SURF_P2));
2148                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2149                                 PIPE_CONFIG(ADDR_SURF_P2) |
2150                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2151                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2152                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2153                                  PIPE_CONFIG(ADDR_SURF_P2) |
2154                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2155                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2156                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2157                                  PIPE_CONFIG(ADDR_SURF_P2) |
2158                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2159                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2160                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2161                                  PIPE_CONFIG(ADDR_SURF_P2) |
2162                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2163                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2164                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2165                                  PIPE_CONFIG(ADDR_SURF_P2) |
2166                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2167                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2168                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2169                                  PIPE_CONFIG(ADDR_SURF_P2) |
2170                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2171                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2172                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2173                                  PIPE_CONFIG(ADDR_SURF_P2) |
2174                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2175                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2176                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2177                                  PIPE_CONFIG(ADDR_SURF_P2) |
2178                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2179                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2180                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2181                                  PIPE_CONFIG(ADDR_SURF_P2) |
2182                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2183                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2184                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2185                                  PIPE_CONFIG(ADDR_SURF_P2) |
2186                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2187                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2188                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2189                                  PIPE_CONFIG(ADDR_SURF_P2) |
2190                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2191                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2192                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2193                                  PIPE_CONFIG(ADDR_SURF_P2) |
2194                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2195                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2196                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2197                                  PIPE_CONFIG(ADDR_SURF_P2) |
2198                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2199                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2200                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2201                                  PIPE_CONFIG(ADDR_SURF_P2) |
2202                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2203                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2204                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2205                                  PIPE_CONFIG(ADDR_SURF_P2) |
2206                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2207                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2208                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2209                                  PIPE_CONFIG(ADDR_SURF_P2) |
2210                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2211                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2212                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2213                                  PIPE_CONFIG(ADDR_SURF_P2) |
2214                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2215                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2216                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2217                                  PIPE_CONFIG(ADDR_SURF_P2) |
2218                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2219                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2220
2221                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2222                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2223                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2224                                 NUM_BANKS(ADDR_SURF_8_BANK));
2225                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2226                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2227                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2228                                 NUM_BANKS(ADDR_SURF_8_BANK));
2229                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2230                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2231                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2232                                 NUM_BANKS(ADDR_SURF_8_BANK));
2233                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2234                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2235                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2236                                 NUM_BANKS(ADDR_SURF_8_BANK));
2237                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2238                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2239                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2240                                 NUM_BANKS(ADDR_SURF_8_BANK));
2241                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2242                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2243                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2244                                 NUM_BANKS(ADDR_SURF_8_BANK));
2245                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2246                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2247                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2248                                 NUM_BANKS(ADDR_SURF_8_BANK));
2249                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2250                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2251                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2252                                 NUM_BANKS(ADDR_SURF_16_BANK));
2253                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2254                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2255                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2256                                 NUM_BANKS(ADDR_SURF_16_BANK));
2257                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2258                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2259                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2260                                  NUM_BANKS(ADDR_SURF_16_BANK));
2261                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2262                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2263                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2264                                  NUM_BANKS(ADDR_SURF_16_BANK));
2265                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2266                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2267                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2268                                  NUM_BANKS(ADDR_SURF_16_BANK));
2269                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2270                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2271                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2272                                  NUM_BANKS(ADDR_SURF_16_BANK));
2273                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2274                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2275                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2276                                  NUM_BANKS(ADDR_SURF_8_BANK));
2277
2278                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2279                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2280                             reg_offset != 23)
2281                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2282
2283                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2284                         if (reg_offset != 7)
2285                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2286
2287                 break;
2288         case CHIP_FIJI:
2289         case CHIP_VEGAM:
2290                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2291                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2292                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2293                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2294                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2295                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2296                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2297                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2298                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2299                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2300                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2301                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2302                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2303                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2304                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2305                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2306                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2307                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2308                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2309                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2310                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2311                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2312                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2313                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2314                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2315                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2316                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2317                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2318                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2319                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2320                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2321                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2322                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2323                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2324                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2325                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2326                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2327                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2328                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2329                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2330                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2331                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2332                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2333                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2334                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2335                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2336                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2337                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2338                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2339                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2340                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2341                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2342                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2343                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2344                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2345                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2346                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2347                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2348                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2349                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2350                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2351                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2352                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2353                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2354                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2355                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2356                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2357                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2358                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2359                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2360                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2361                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2362                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2363                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2364                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2365                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2366                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2367                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2368                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2369                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2370                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2371                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2372                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2373                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2374                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2375                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2376                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2377                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2378                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2379                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2380                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2381                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2382                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2383                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2384                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2385                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2386                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2387                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2388                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2389                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2390                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2391                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2392                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2393                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2394                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2395                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2396                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2397                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2398                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2399                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2400                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2401                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2402                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2403                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2404                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2405                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2406                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2407                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2408                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2409                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2410                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2411                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2412
2413                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2414                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2415                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2416                                 NUM_BANKS(ADDR_SURF_8_BANK));
2417                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2418                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2419                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2420                                 NUM_BANKS(ADDR_SURF_8_BANK));
2421                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2422                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2423                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2424                                 NUM_BANKS(ADDR_SURF_8_BANK));
2425                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2426                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2427                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2428                                 NUM_BANKS(ADDR_SURF_8_BANK));
2429                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2430                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2431                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2432                                 NUM_BANKS(ADDR_SURF_8_BANK));
2433                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2434                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2435                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2436                                 NUM_BANKS(ADDR_SURF_8_BANK));
2437                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2438                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2439                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2440                                 NUM_BANKS(ADDR_SURF_8_BANK));
2441                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2442                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2443                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2444                                 NUM_BANKS(ADDR_SURF_8_BANK));
2445                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2446                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2447                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2448                                 NUM_BANKS(ADDR_SURF_8_BANK));
2449                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2450                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2451                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2452                                  NUM_BANKS(ADDR_SURF_8_BANK));
2453                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2454                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2455                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2456                                  NUM_BANKS(ADDR_SURF_8_BANK));
2457                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2458                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2459                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2460                                  NUM_BANKS(ADDR_SURF_8_BANK));
2461                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2462                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2463                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2464                                  NUM_BANKS(ADDR_SURF_8_BANK));
2465                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2466                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2467                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2468                                  NUM_BANKS(ADDR_SURF_4_BANK));
2469
2470                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2471                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2472
2473                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2474                         if (reg_offset != 7)
2475                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2476
2477                 break;
2478         case CHIP_TONGA:
2479                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2480                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2481                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2482                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2483                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2484                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2485                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2486                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2487                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2488                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2489                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2490                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2491                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2492                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2493                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2494                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2495                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2496                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2497                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2498                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2499                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2500                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2501                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2502                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2503                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2504                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2505                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2506                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2507                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2508                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2509                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2510                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2511                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2512                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2513                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2514                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2515                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2516                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2517                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2518                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2519                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2520                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2521                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2522                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2523                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2524                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2525                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2526                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2527                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2528                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2529                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2530                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2531                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2532                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2533                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2534                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2535                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2536                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2537                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2538                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2539                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2540                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2541                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2542                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2543                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2544                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2545                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2546                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2547                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2548                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2549                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2550                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2551                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2552                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2553                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2554                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2555                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2556                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2557                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2558                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2559                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2560                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2561                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2562                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2563                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2564                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2565                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2566                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2567                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2568                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2569                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2570                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2571                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2572                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2573                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2574                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2575                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2576                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2577                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2578                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2579                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2580                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2581                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2582                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2583                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2584                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2585                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2586                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2587                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2588                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2589                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2590                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2591                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2592                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2593                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2594                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2595                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2596                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2597                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2598                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2599                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2600                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2601
2602                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2603                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2604                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2605                                 NUM_BANKS(ADDR_SURF_16_BANK));
2606                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2607                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2608                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2609                                 NUM_BANKS(ADDR_SURF_16_BANK));
2610                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2611                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2612                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2613                                 NUM_BANKS(ADDR_SURF_16_BANK));
2614                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2615                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2616                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2617                                 NUM_BANKS(ADDR_SURF_16_BANK));
2618                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2619                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2620                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2621                                 NUM_BANKS(ADDR_SURF_16_BANK));
2622                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2623                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2624                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2625                                 NUM_BANKS(ADDR_SURF_16_BANK));
2626                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2627                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2628                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2629                                 NUM_BANKS(ADDR_SURF_16_BANK));
2630                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2631                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2632                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2633                                 NUM_BANKS(ADDR_SURF_16_BANK));
2634                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2635                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2636                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2637                                 NUM_BANKS(ADDR_SURF_16_BANK));
2638                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2639                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2640                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2641                                  NUM_BANKS(ADDR_SURF_16_BANK));
2642                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2643                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2644                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2645                                  NUM_BANKS(ADDR_SURF_16_BANK));
2646                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2647                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2648                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2649                                  NUM_BANKS(ADDR_SURF_8_BANK));
2650                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2651                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2652                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2653                                  NUM_BANKS(ADDR_SURF_4_BANK));
2654                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2655                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2656                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2657                                  NUM_BANKS(ADDR_SURF_4_BANK));
2658
2659                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2660                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2661
2662                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2663                         if (reg_offset != 7)
2664                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2665
2666                 break;
2667         case CHIP_POLARIS11:
2668         case CHIP_POLARIS12:
2669                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2670                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2671                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2672                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2673                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2674                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2675                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2676                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2677                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2678                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2679                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2680                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2681                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2682                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2683                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2684                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2685                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2686                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2687                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2688                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2689                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2690                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2691                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2692                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2693                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2694                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2695                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2696                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2697                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2698                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2699                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2700                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2701                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2702                                 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2703                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2704                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2705                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2706                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2707                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2708                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2709                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2710                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2711                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2712                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2713                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2714                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2715                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2716                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2717                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2718                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2719                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2720                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2721                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2722                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2723                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2724                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2725                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2726                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2727                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2728                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2729                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2730                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2731                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2732                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2733                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2734                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2735                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2736                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2737                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2738                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2739                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2740                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2741                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2742                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2743                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2744                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2745                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2746                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2747                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2748                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2749                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2750                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2751                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2752                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2753                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2754                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2755                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2756                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2757                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2758                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2759                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2760                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2761                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2762                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2763                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2764                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2765                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2766                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2767                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2768                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2769                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2770                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2771                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2772                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2773                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2774                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2775                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2776                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2777                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2778                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2779                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2780                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2781                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2782                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2783                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2784                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2785                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2786                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2787                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2788                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2789                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2790                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2791
2792                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2793                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2794                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2795                                 NUM_BANKS(ADDR_SURF_16_BANK));
2796
2797                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2798                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2799                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2800                                 NUM_BANKS(ADDR_SURF_16_BANK));
2801
2802                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2803                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2804                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2805                                 NUM_BANKS(ADDR_SURF_16_BANK));
2806
2807                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2808                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2809                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2810                                 NUM_BANKS(ADDR_SURF_16_BANK));
2811
2812                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2813                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2814                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2815                                 NUM_BANKS(ADDR_SURF_16_BANK));
2816
2817                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2818                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2819                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2820                                 NUM_BANKS(ADDR_SURF_16_BANK));
2821
2822                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2823                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2824                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2825                                 NUM_BANKS(ADDR_SURF_16_BANK));
2826
2827                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2828                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2829                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2830                                 NUM_BANKS(ADDR_SURF_16_BANK));
2831
2832                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2833                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2834                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2835                                 NUM_BANKS(ADDR_SURF_16_BANK));
2836
2837                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2838                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2839                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2840                                 NUM_BANKS(ADDR_SURF_16_BANK));
2841
2842                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2843                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2844                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2845                                 NUM_BANKS(ADDR_SURF_16_BANK));
2846
2847                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2848                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2849                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2850                                 NUM_BANKS(ADDR_SURF_16_BANK));
2851
2852                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2853                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2854                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2855                                 NUM_BANKS(ADDR_SURF_8_BANK));
2856
2857                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2858                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2859                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2860                                 NUM_BANKS(ADDR_SURF_4_BANK));
2861
2862                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2863                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2864
2865                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2866                         if (reg_offset != 7)
2867                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2868
2869                 break;
2870         case CHIP_POLARIS10:
2871                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2872                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2873                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2874                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2875                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2876                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2877                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2878                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2879                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2880                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2881                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2882                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2883                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2884                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2885                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2886                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2887                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2888                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2889                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2890                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2891                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2892                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2893                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2894                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2895                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2896                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2897                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2898                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2899                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2900                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2901                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2902                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2903                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2904                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2905                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2906                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2907                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2908                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2909                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2910                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2911                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2912                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2913                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2914                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2915                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2916                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2917                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2918                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2919                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2920                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2921                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2922                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2923                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2924                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2925                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2926                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2927                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2928                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2929                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2930                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2931                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2932                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2933                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2934                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2935                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2936                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2937                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2938                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2939                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2940                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2941                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2942                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2943                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2944                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2945                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2946                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2947                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2948                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2949                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2950                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2951                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2952                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2953                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2954                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2955                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2956                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2957                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2958                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2959                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2960                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2961                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2962                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2963                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2964                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2965                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2966                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2967                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2968                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2969                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2970                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2971                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2972                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2973                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2974                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2975                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2976                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2977                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2978                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2979                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2980                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2981                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2982                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2983                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2984                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2985                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2986                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2987                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2988                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2989                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2990                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2991                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2992                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2993
2994                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2995                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2996                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2997                                 NUM_BANKS(ADDR_SURF_16_BANK));
2998
2999                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3000                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3001                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3002                                 NUM_BANKS(ADDR_SURF_16_BANK));
3003
3004                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3005                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3006                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3007                                 NUM_BANKS(ADDR_SURF_16_BANK));
3008
3009                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3010                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3011                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3012                                 NUM_BANKS(ADDR_SURF_16_BANK));
3013
3014                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3015                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3016                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3017                                 NUM_BANKS(ADDR_SURF_16_BANK));
3018
3019                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3020                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3021                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3022                                 NUM_BANKS(ADDR_SURF_16_BANK));
3023
3024                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3025                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3026                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3027                                 NUM_BANKS(ADDR_SURF_16_BANK));
3028
3029                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3030                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3031                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3032                                 NUM_BANKS(ADDR_SURF_16_BANK));
3033
3034                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3035                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3036                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3037                                 NUM_BANKS(ADDR_SURF_16_BANK));
3038
3039                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3040                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3041                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3042                                 NUM_BANKS(ADDR_SURF_16_BANK));
3043
3044                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3045                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3046                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3047                                 NUM_BANKS(ADDR_SURF_16_BANK));
3048
3049                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3050                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3051                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3052                                 NUM_BANKS(ADDR_SURF_8_BANK));
3053
3054                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3055                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3056                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3057                                 NUM_BANKS(ADDR_SURF_4_BANK));
3058
3059                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3060                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3061                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3062                                 NUM_BANKS(ADDR_SURF_4_BANK));
3063
3064                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3065                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3066
3067                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3068                         if (reg_offset != 7)
3069                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3070
3071                 break;
3072         case CHIP_STONEY:
3073                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3074                                 PIPE_CONFIG(ADDR_SURF_P2) |
3075                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3076                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3077                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3078                                 PIPE_CONFIG(ADDR_SURF_P2) |
3079                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3080                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3081                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3082                                 PIPE_CONFIG(ADDR_SURF_P2) |
3083                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3084                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3085                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3086                                 PIPE_CONFIG(ADDR_SURF_P2) |
3087                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3088                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3089                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3090                                 PIPE_CONFIG(ADDR_SURF_P2) |
3091                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3092                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3093                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3094                                 PIPE_CONFIG(ADDR_SURF_P2) |
3095                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3096                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3097                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3098                                 PIPE_CONFIG(ADDR_SURF_P2) |
3099                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3100                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3101                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3102                                 PIPE_CONFIG(ADDR_SURF_P2));
3103                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3104                                 PIPE_CONFIG(ADDR_SURF_P2) |
3105                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3106                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3107                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3108                                  PIPE_CONFIG(ADDR_SURF_P2) |
3109                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3110                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3111                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3112                                  PIPE_CONFIG(ADDR_SURF_P2) |
3113                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3114                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3115                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3116                                  PIPE_CONFIG(ADDR_SURF_P2) |
3117                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3118                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3119                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3120                                  PIPE_CONFIG(ADDR_SURF_P2) |
3121                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3122                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3123                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3124                                  PIPE_CONFIG(ADDR_SURF_P2) |
3125                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3126                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3127                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3128                                  PIPE_CONFIG(ADDR_SURF_P2) |
3129                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3130                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3131                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3132                                  PIPE_CONFIG(ADDR_SURF_P2) |
3133                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3134                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3135                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3136                                  PIPE_CONFIG(ADDR_SURF_P2) |
3137                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3138                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3139                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3140                                  PIPE_CONFIG(ADDR_SURF_P2) |
3141                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3142                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3143                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3144                                  PIPE_CONFIG(ADDR_SURF_P2) |
3145                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3146                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3147                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3148                                  PIPE_CONFIG(ADDR_SURF_P2) |
3149                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3150                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3151                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3152                                  PIPE_CONFIG(ADDR_SURF_P2) |
3153                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3154                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3155                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3156                                  PIPE_CONFIG(ADDR_SURF_P2) |
3157                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3158                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3159                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3160                                  PIPE_CONFIG(ADDR_SURF_P2) |
3161                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3162                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3163                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3164                                  PIPE_CONFIG(ADDR_SURF_P2) |
3165                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3166                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3167                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3168                                  PIPE_CONFIG(ADDR_SURF_P2) |
3169                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3170                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3171                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3172                                  PIPE_CONFIG(ADDR_SURF_P2) |
3173                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3174                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3175
3176                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3177                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3178                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3179                                 NUM_BANKS(ADDR_SURF_8_BANK));
3180                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3181                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3182                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3183                                 NUM_BANKS(ADDR_SURF_8_BANK));
3184                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3185                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3186                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3187                                 NUM_BANKS(ADDR_SURF_8_BANK));
3188                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3189                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3190                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3191                                 NUM_BANKS(ADDR_SURF_8_BANK));
3192                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3193                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3194                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3195                                 NUM_BANKS(ADDR_SURF_8_BANK));
3196                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3197                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3198                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3199                                 NUM_BANKS(ADDR_SURF_8_BANK));
3200                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3201                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3202                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3203                                 NUM_BANKS(ADDR_SURF_8_BANK));
3204                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3205                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3206                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3207                                 NUM_BANKS(ADDR_SURF_16_BANK));
3208                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3209                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3210                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3211                                 NUM_BANKS(ADDR_SURF_16_BANK));
3212                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3213                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3214                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3215                                  NUM_BANKS(ADDR_SURF_16_BANK));
3216                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3217                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3218                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3219                                  NUM_BANKS(ADDR_SURF_16_BANK));
3220                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3221                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3222                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3223                                  NUM_BANKS(ADDR_SURF_16_BANK));
3224                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3225                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3226                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3227                                  NUM_BANKS(ADDR_SURF_16_BANK));
3228                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3229                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3230                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3231                                  NUM_BANKS(ADDR_SURF_8_BANK));
3232
3233                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3234                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3235                             reg_offset != 23)
3236                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3237
3238                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3239                         if (reg_offset != 7)
3240                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3241
3242                 break;
3243         default:
3244                 dev_warn(adev->dev,
3245                          "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
3246                          adev->asic_type);
3247                 fallthrough;
3248
3249         case CHIP_CARRIZO:
3250                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3251                                 PIPE_CONFIG(ADDR_SURF_P2) |
3252                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3253                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3254                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3255                                 PIPE_CONFIG(ADDR_SURF_P2) |
3256                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3257                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3258                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3259                                 PIPE_CONFIG(ADDR_SURF_P2) |
3260                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3261                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3262                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3263                                 PIPE_CONFIG(ADDR_SURF_P2) |
3264                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3265                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3266                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3267                                 PIPE_CONFIG(ADDR_SURF_P2) |
3268                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3269                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3270                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3271                                 PIPE_CONFIG(ADDR_SURF_P2) |
3272                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3273                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3274                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3275                                 PIPE_CONFIG(ADDR_SURF_P2) |
3276                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3277                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3278                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3279                                 PIPE_CONFIG(ADDR_SURF_P2));
3280                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3281                                 PIPE_CONFIG(ADDR_SURF_P2) |
3282                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3283                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3284                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3285                                  PIPE_CONFIG(ADDR_SURF_P2) |
3286                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3287                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3288                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3289                                  PIPE_CONFIG(ADDR_SURF_P2) |
3290                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3291                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3292                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3293                                  PIPE_CONFIG(ADDR_SURF_P2) |
3294                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3295                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3296                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3297                                  PIPE_CONFIG(ADDR_SURF_P2) |
3298                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3299                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3300                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3301                                  PIPE_CONFIG(ADDR_SURF_P2) |
3302                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3303                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3304                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3305                                  PIPE_CONFIG(ADDR_SURF_P2) |
3306                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3307                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3308                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3309                                  PIPE_CONFIG(ADDR_SURF_P2) |
3310                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3311                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3312                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3313                                  PIPE_CONFIG(ADDR_SURF_P2) |
3314                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3315                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3316                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3317                                  PIPE_CONFIG(ADDR_SURF_P2) |
3318                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3319                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3320                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3321                                  PIPE_CONFIG(ADDR_SURF_P2) |
3322                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3323                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3324                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3325                                  PIPE_CONFIG(ADDR_SURF_P2) |
3326                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3327                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3328                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3329                                  PIPE_CONFIG(ADDR_SURF_P2) |
3330                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3331                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3332                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3333                                  PIPE_CONFIG(ADDR_SURF_P2) |
3334                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3335                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3336                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3337                                  PIPE_CONFIG(ADDR_SURF_P2) |
3338                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3339                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3340                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3341                                  PIPE_CONFIG(ADDR_SURF_P2) |
3342                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3343                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3344                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3345                                  PIPE_CONFIG(ADDR_SURF_P2) |
3346                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3347                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3348                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3349                                  PIPE_CONFIG(ADDR_SURF_P2) |
3350                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3351                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3352
3353                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3354                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3355                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3356                                 NUM_BANKS(ADDR_SURF_8_BANK));
3357                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3358                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3359                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3360                                 NUM_BANKS(ADDR_SURF_8_BANK));
3361                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3362                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3363                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3364                                 NUM_BANKS(ADDR_SURF_8_BANK));
3365                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3366                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3367                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3368                                 NUM_BANKS(ADDR_SURF_8_BANK));
3369                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3370                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3371                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3372                                 NUM_BANKS(ADDR_SURF_8_BANK));
3373                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3374                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3375                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3376                                 NUM_BANKS(ADDR_SURF_8_BANK));
3377                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3378                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3379                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3380                                 NUM_BANKS(ADDR_SURF_8_BANK));
3381                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3382                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3383                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3384                                 NUM_BANKS(ADDR_SURF_16_BANK));
3385                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3386                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3387                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3388                                 NUM_BANKS(ADDR_SURF_16_BANK));
3389                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3390                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3391                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3392                                  NUM_BANKS(ADDR_SURF_16_BANK));
3393                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3394                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3395                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3396                                  NUM_BANKS(ADDR_SURF_16_BANK));
3397                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3398                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3399                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3400                                  NUM_BANKS(ADDR_SURF_16_BANK));
3401                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3402                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3403                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3404                                  NUM_BANKS(ADDR_SURF_16_BANK));
3405                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3406                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3407                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3408                                  NUM_BANKS(ADDR_SURF_8_BANK));
3409
3410                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3411                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3412                             reg_offset != 23)
3413                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3414
3415                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3416                         if (reg_offset != 7)
3417                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3418
3419                 break;
3420         }
3421 }
3422
3423 static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
3424                                   u32 se_num, u32 sh_num, u32 instance)
3425 {
3426         u32 data;
3427
3428         if (instance == 0xffffffff)
3429                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
3430         else
3431                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
3432
3433         if (se_num == 0xffffffff)
3434                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3435         else
3436                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3437
3438         if (sh_num == 0xffffffff)
3439                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3440         else
3441                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3442
3443         WREG32(mmGRBM_GFX_INDEX, data);
3444 }
3445
3446 static void gfx_v8_0_select_me_pipe_q(struct amdgpu_device *adev,
3447                                   u32 me, u32 pipe, u32 q, u32 vm)
3448 {
3449         vi_srbm_select(adev, me, pipe, q, vm);
3450 }
3451
3452 static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
3453 {
3454         u32 data, mask;
3455
3456         data =  RREG32(mmCC_RB_BACKEND_DISABLE) |
3457                 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3458
3459         data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE);
3460
3461         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
3462                                          adev->gfx.config.max_sh_per_se);
3463
3464         return (~data) & mask;
3465 }
3466
3467 static void
3468 gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1)
3469 {
3470         switch (adev->asic_type) {
3471         case CHIP_FIJI:
3472         case CHIP_VEGAM:
3473                 *rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) |
3474                           RB_XSEL2(1) | PKR_MAP(2) |
3475                           PKR_XSEL(1) | PKR_YSEL(1) |
3476                           SE_MAP(2) | SE_XSEL(2) | SE_YSEL(3);
3477                 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) |
3478                            SE_PAIR_YSEL(2);
3479                 break;
3480         case CHIP_TONGA:
3481         case CHIP_POLARIS10:
3482                 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3483                           SE_XSEL(1) | SE_YSEL(1);
3484                 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(2) |
3485                            SE_PAIR_YSEL(2);
3486                 break;
3487         case CHIP_TOPAZ:
3488         case CHIP_CARRIZO:
3489                 *rconf |= RB_MAP_PKR0(2);
3490                 *rconf1 |= 0x0;
3491                 break;
3492         case CHIP_POLARIS11:
3493         case CHIP_POLARIS12:
3494                 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3495                           SE_XSEL(1) | SE_YSEL(1);
3496                 *rconf1 |= 0x0;
3497                 break;
3498         case CHIP_STONEY:
3499                 *rconf |= 0x0;
3500                 *rconf1 |= 0x0;
3501                 break;
3502         default:
3503                 DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type);
3504                 break;
3505         }
3506 }
3507
3508 static void
3509 gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev,
3510                                         u32 raster_config, u32 raster_config_1,
3511                                         unsigned rb_mask, unsigned num_rb)
3512 {
3513         unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1);
3514         unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1);
3515         unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2);
3516         unsigned rb_per_se = num_rb / num_se;
3517         unsigned se_mask[4];
3518         unsigned se;
3519
3520         se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask;
3521         se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask;
3522         se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask;
3523         se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask;
3524
3525         WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4));
3526         WARN_ON(!(sh_per_se == 1 || sh_per_se == 2));
3527         WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2));
3528
3529         if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
3530                              (!se_mask[2] && !se_mask[3]))) {
3531                 raster_config_1 &= ~SE_PAIR_MAP_MASK;
3532
3533                 if (!se_mask[0] && !se_mask[1]) {
3534                         raster_config_1 |=
3535                                 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3);
3536                 } else {
3537                         raster_config_1 |=
3538                                 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0);
3539                 }
3540         }
3541
3542         for (se = 0; se < num_se; se++) {
3543                 unsigned raster_config_se = raster_config;
3544                 unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
3545                 unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
3546                 int idx = (se / 2) * 2;
3547
3548                 if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
3549                         raster_config_se &= ~SE_MAP_MASK;
3550
3551                         if (!se_mask[idx]) {
3552                                 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3);
3553                         } else {
3554                                 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0);
3555                         }
3556                 }
3557
3558                 pkr0_mask &= rb_mask;
3559                 pkr1_mask &= rb_mask;
3560                 if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
3561                         raster_config_se &= ~PKR_MAP_MASK;
3562
3563                         if (!pkr0_mask) {
3564                                 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3);
3565                         } else {
3566                                 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0);
3567                         }
3568                 }
3569
3570                 if (rb_per_se >= 2) {
3571                         unsigned rb0_mask = 1 << (se * rb_per_se);
3572                         unsigned rb1_mask = rb0_mask << 1;
3573
3574                         rb0_mask &= rb_mask;
3575                         rb1_mask &= rb_mask;
3576                         if (!rb0_mask || !rb1_mask) {
3577                                 raster_config_se &= ~RB_MAP_PKR0_MASK;
3578
3579                                 if (!rb0_mask) {
3580                                         raster_config_se |=
3581                                                 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3);
3582                                 } else {
3583                                         raster_config_se |=
3584                                                 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0);
3585                                 }
3586                         }
3587
3588                         if (rb_per_se > 2) {
3589                                 rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
3590                                 rb1_mask = rb0_mask << 1;
3591                                 rb0_mask &= rb_mask;
3592                                 rb1_mask &= rb_mask;
3593                                 if (!rb0_mask || !rb1_mask) {
3594                                         raster_config_se &= ~RB_MAP_PKR1_MASK;
3595
3596                                         if (!rb0_mask) {
3597                                                 raster_config_se |=
3598                                                         RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3);
3599                                         } else {
3600                                                 raster_config_se |=
3601                                                         RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0);
3602                                         }
3603                                 }
3604                         }
3605                 }
3606
3607                 /* GRBM_GFX_INDEX has a different offset on VI */
3608                 gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff);
3609                 WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se);
3610                 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3611         }
3612
3613         /* GRBM_GFX_INDEX has a different offset on VI */
3614         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3615 }
3616
3617 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
3618 {
3619         int i, j;
3620         u32 data;
3621         u32 raster_config = 0, raster_config_1 = 0;
3622         u32 active_rbs = 0;
3623         u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
3624                                         adev->gfx.config.max_sh_per_se;
3625         unsigned num_rb_pipes;
3626
3627         mutex_lock(&adev->grbm_idx_mutex);
3628         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3629                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3630                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3631                         data = gfx_v8_0_get_rb_active_bitmap(adev);
3632                         active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
3633                                                rb_bitmap_width_per_sh);
3634                 }
3635         }
3636         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3637
3638         adev->gfx.config.backend_enable_mask = active_rbs;
3639         adev->gfx.config.num_rbs = hweight32(active_rbs);
3640
3641         num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se *
3642                              adev->gfx.config.max_shader_engines, 16);
3643
3644         gfx_v8_0_raster_config(adev, &raster_config, &raster_config_1);
3645
3646         if (!adev->gfx.config.backend_enable_mask ||
3647                         adev->gfx.config.num_rbs >= num_rb_pipes) {
3648                 WREG32(mmPA_SC_RASTER_CONFIG, raster_config);
3649                 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3650         } else {
3651                 gfx_v8_0_write_harvested_raster_configs(adev, raster_config, raster_config_1,
3652                                                         adev->gfx.config.backend_enable_mask,
3653                                                         num_rb_pipes);
3654         }
3655
3656         /* cache the values for userspace */
3657         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3658                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3659                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3660                         adev->gfx.config.rb_config[i][j].rb_backend_disable =
3661                                 RREG32(mmCC_RB_BACKEND_DISABLE);
3662                         adev->gfx.config.rb_config[i][j].user_rb_backend_disable =
3663                                 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3664                         adev->gfx.config.rb_config[i][j].raster_config =
3665                                 RREG32(mmPA_SC_RASTER_CONFIG);
3666                         adev->gfx.config.rb_config[i][j].raster_config_1 =
3667                                 RREG32(mmPA_SC_RASTER_CONFIG_1);
3668                 }
3669         }
3670         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3671         mutex_unlock(&adev->grbm_idx_mutex);
3672 }
3673
3674 #define DEFAULT_SH_MEM_BASES    (0x6000)
3675 /**
3676  * gfx_v8_0_init_compute_vmid - gart enable
3677  *
3678  * @adev: amdgpu_device pointer
3679  *
3680  * Initialize compute vmid sh_mem registers
3681  *
3682  */
3683 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
3684 {
3685         int i;
3686         uint32_t sh_mem_config;
3687         uint32_t sh_mem_bases;
3688
3689         /*
3690          * Configure apertures:
3691          * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
3692          * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
3693          * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
3694          */
3695         sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
3696
3697         sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
3698                         SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
3699                         SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
3700                         SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
3701                         MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
3702                         SH_MEM_CONFIG__PRIVATE_ATC_MASK;
3703
3704         mutex_lock(&adev->srbm_mutex);
3705         for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
3706                 vi_srbm_select(adev, 0, 0, 0, i);
3707                 /* CP and shaders */
3708                 WREG32(mmSH_MEM_CONFIG, sh_mem_config);
3709                 WREG32(mmSH_MEM_APE1_BASE, 1);
3710                 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3711                 WREG32(mmSH_MEM_BASES, sh_mem_bases);
3712         }
3713         vi_srbm_select(adev, 0, 0, 0, 0);
3714         mutex_unlock(&adev->srbm_mutex);
3715
3716         /* Initialize all compute VMIDs to have no GDS, GWS, or OA
3717            access. These should be enabled by FW for target VMIDs. */
3718         for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
3719                 WREG32(amdgpu_gds_reg_offset[i].mem_base, 0);
3720                 WREG32(amdgpu_gds_reg_offset[i].mem_size, 0);
3721                 WREG32(amdgpu_gds_reg_offset[i].gws, 0);
3722                 WREG32(amdgpu_gds_reg_offset[i].oa, 0);
3723         }
3724 }
3725
3726 static void gfx_v8_0_init_gds_vmid(struct amdgpu_device *adev)
3727 {
3728         int vmid;
3729
3730         /*
3731          * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
3732          * access. Compute VMIDs should be enabled by FW for target VMIDs,
3733          * the driver can enable them for graphics. VMID0 should maintain
3734          * access so that HWS firmware can save/restore entries.
3735          */
3736         for (vmid = 1; vmid < AMDGPU_NUM_VMID; vmid++) {
3737                 WREG32(amdgpu_gds_reg_offset[vmid].mem_base, 0);
3738                 WREG32(amdgpu_gds_reg_offset[vmid].mem_size, 0);
3739                 WREG32(amdgpu_gds_reg_offset[vmid].gws, 0);
3740                 WREG32(amdgpu_gds_reg_offset[vmid].oa, 0);
3741         }
3742 }
3743
3744 static void gfx_v8_0_config_init(struct amdgpu_device *adev)
3745 {
3746         switch (adev->asic_type) {
3747         default:
3748                 adev->gfx.config.double_offchip_lds_buf = 1;
3749                 break;
3750         case CHIP_CARRIZO:
3751         case CHIP_STONEY:
3752                 adev->gfx.config.double_offchip_lds_buf = 0;
3753                 break;
3754         }
3755 }
3756
3757 static void gfx_v8_0_constants_init(struct amdgpu_device *adev)
3758 {
3759         u32 tmp, sh_static_mem_cfg;
3760         int i;
3761
3762         WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF);
3763         WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3764         WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3765         WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
3766
3767         gfx_v8_0_tiling_mode_table_init(adev);
3768         gfx_v8_0_setup_rb(adev);
3769         gfx_v8_0_get_cu_info(adev);
3770         gfx_v8_0_config_init(adev);
3771
3772         /* XXX SH_MEM regs */
3773         /* where to put LDS, scratch, GPUVM in FSA64 space */
3774         sh_static_mem_cfg = REG_SET_FIELD(0, SH_STATIC_MEM_CONFIG,
3775                                    SWIZZLE_ENABLE, 1);
3776         sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3777                                    ELEMENT_SIZE, 1);
3778         sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3779                                    INDEX_STRIDE, 3);
3780         WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg);
3781
3782         mutex_lock(&adev->srbm_mutex);
3783         for (i = 0; i < adev->vm_manager.id_mgr[0].num_ids; i++) {
3784                 vi_srbm_select(adev, 0, 0, 0, i);
3785                 /* CP and shaders */
3786                 if (i == 0) {
3787                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
3788                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3789                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3790                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3791                         WREG32(mmSH_MEM_CONFIG, tmp);
3792                         WREG32(mmSH_MEM_BASES, 0);
3793                 } else {
3794                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
3795                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3796                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3797                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3798                         WREG32(mmSH_MEM_CONFIG, tmp);
3799                         tmp = adev->gmc.shared_aperture_start >> 48;
3800                         WREG32(mmSH_MEM_BASES, tmp);
3801                 }
3802
3803                 WREG32(mmSH_MEM_APE1_BASE, 1);
3804                 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3805         }
3806         vi_srbm_select(adev, 0, 0, 0, 0);
3807         mutex_unlock(&adev->srbm_mutex);
3808
3809         gfx_v8_0_init_compute_vmid(adev);
3810         gfx_v8_0_init_gds_vmid(adev);
3811
3812         mutex_lock(&adev->grbm_idx_mutex);
3813         /*
3814          * making sure that the following register writes will be broadcasted
3815          * to all the shaders
3816          */
3817         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3818
3819         WREG32(mmPA_SC_FIFO_SIZE,
3820                    (adev->gfx.config.sc_prim_fifo_size_frontend <<
3821                         PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
3822                    (adev->gfx.config.sc_prim_fifo_size_backend <<
3823                         PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
3824                    (adev->gfx.config.sc_hiz_tile_fifo_size <<
3825                         PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
3826                    (adev->gfx.config.sc_earlyz_tile_fifo_size <<
3827                         PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
3828
3829         tmp = RREG32(mmSPI_ARB_PRIORITY);
3830         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS0, 2);
3831         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS1, 2);
3832         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS2, 2);
3833         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS3, 2);
3834         WREG32(mmSPI_ARB_PRIORITY, tmp);
3835
3836         mutex_unlock(&adev->grbm_idx_mutex);
3837
3838 }
3839
3840 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3841 {
3842         u32 i, j, k;
3843         u32 mask;
3844
3845         mutex_lock(&adev->grbm_idx_mutex);
3846         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3847                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3848                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3849                         for (k = 0; k < adev->usec_timeout; k++) {
3850                                 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3851                                         break;
3852                                 udelay(1);
3853                         }
3854                         if (k == adev->usec_timeout) {
3855                                 gfx_v8_0_select_se_sh(adev, 0xffffffff,
3856                                                       0xffffffff, 0xffffffff);
3857                                 mutex_unlock(&adev->grbm_idx_mutex);
3858                                 DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
3859                                          i, j);
3860                                 return;
3861                         }
3862                 }
3863         }
3864         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3865         mutex_unlock(&adev->grbm_idx_mutex);
3866
3867         mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3868                 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3869                 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3870                 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3871         for (k = 0; k < adev->usec_timeout; k++) {
3872                 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3873                         break;
3874                 udelay(1);
3875         }
3876 }
3877
3878 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3879                                                bool enable)
3880 {
3881         u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3882
3883         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
3884         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
3885         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
3886         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
3887
3888         WREG32(mmCP_INT_CNTL_RING0, tmp);
3889 }
3890
3891 static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
3892 {
3893         adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
3894         /* csib */
3895         WREG32(mmRLC_CSIB_ADDR_HI,
3896                         adev->gfx.rlc.clear_state_gpu_addr >> 32);
3897         WREG32(mmRLC_CSIB_ADDR_LO,
3898                         adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
3899         WREG32(mmRLC_CSIB_LENGTH,
3900                         adev->gfx.rlc.clear_state_size);
3901 }
3902
3903 static void gfx_v8_0_parse_ind_reg_list(int *register_list_format,
3904                                 int ind_offset,
3905                                 int list_size,
3906                                 int *unique_indices,
3907                                 int *indices_count,
3908                                 int max_indices,
3909                                 int *ind_start_offsets,
3910                                 int *offset_count,
3911                                 int max_offset)
3912 {
3913         int indices;
3914         bool new_entry = true;
3915
3916         for (; ind_offset < list_size; ind_offset++) {
3917
3918                 if (new_entry) {
3919                         new_entry = false;
3920                         ind_start_offsets[*offset_count] = ind_offset;
3921                         *offset_count = *offset_count + 1;
3922                         BUG_ON(*offset_count >= max_offset);
3923                 }
3924
3925                 if (register_list_format[ind_offset] == 0xFFFFFFFF) {
3926                         new_entry = true;
3927                         continue;
3928                 }
3929
3930                 ind_offset += 2;
3931
3932                 /* look for the matching indice */
3933                 for (indices = 0;
3934                         indices < *indices_count;
3935                         indices++) {
3936                         if (unique_indices[indices] ==
3937                                 register_list_format[ind_offset])
3938                                 break;
3939                 }
3940
3941                 if (indices >= *indices_count) {
3942                         unique_indices[*indices_count] =
3943                                 register_list_format[ind_offset];
3944                         indices = *indices_count;
3945                         *indices_count = *indices_count + 1;
3946                         BUG_ON(*indices_count >= max_indices);
3947                 }
3948
3949                 register_list_format[ind_offset] = indices;
3950         }
3951 }
3952
3953 static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
3954 {
3955         int i, temp, data;
3956         int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0};
3957         int indices_count = 0;
3958         int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3959         int offset_count = 0;
3960
3961         int list_size;
3962         unsigned int *register_list_format =
3963                 kmemdup(adev->gfx.rlc.register_list_format,
3964                         adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
3965         if (!register_list_format)
3966                 return -ENOMEM;
3967
3968         gfx_v8_0_parse_ind_reg_list(register_list_format,
3969                                 RLC_FormatDirectRegListLength,
3970                                 adev->gfx.rlc.reg_list_format_size_bytes >> 2,
3971                                 unique_indices,
3972                                 &indices_count,
3973                                 ARRAY_SIZE(unique_indices),
3974                                 indirect_start_offsets,
3975                                 &offset_count,
3976                                 ARRAY_SIZE(indirect_start_offsets));
3977
3978         /* save and restore list */
3979         WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1);
3980
3981         WREG32(mmRLC_SRM_ARAM_ADDR, 0);
3982         for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
3983                 WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]);
3984
3985         /* indirect list */
3986         WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start);
3987         for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
3988                 WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]);
3989
3990         list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
3991         list_size = list_size >> 1;
3992         WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size);
3993         WREG32(mmRLC_GPM_SCRATCH_DATA, list_size);
3994
3995         /* starting offsets starts */
3996         WREG32(mmRLC_GPM_SCRATCH_ADDR,
3997                 adev->gfx.rlc.starting_offsets_start);
3998         for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
3999                 WREG32(mmRLC_GPM_SCRATCH_DATA,
4000                                 indirect_start_offsets[i]);
4001
4002         /* unique indices */
4003         temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
4004         data = mmRLC_SRM_INDEX_CNTL_DATA_0;
4005         for (i = 0; i < ARRAY_SIZE(unique_indices); i++) {
4006                 if (unique_indices[i] != 0) {
4007                         WREG32(temp + i, unique_indices[i] & 0x3FFFF);
4008                         WREG32(data + i, unique_indices[i] >> 20);
4009                 }
4010         }
4011         kfree(register_list_format);
4012
4013         return 0;
4014 }
4015
4016 static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
4017 {
4018         WREG32_FIELD(RLC_SRM_CNTL, SRM_ENABLE, 1);
4019 }
4020
4021 static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev)
4022 {
4023         uint32_t data;
4024
4025         WREG32_FIELD(CP_RB_WPTR_POLL_CNTL, IDLE_POLL_COUNT, 0x60);
4026
4027         data = REG_SET_FIELD(0, RLC_PG_DELAY, POWER_UP_DELAY, 0x10);
4028         data = REG_SET_FIELD(data, RLC_PG_DELAY, POWER_DOWN_DELAY, 0x10);
4029         data = REG_SET_FIELD(data, RLC_PG_DELAY, CMD_PROPAGATE_DELAY, 0x10);
4030         data = REG_SET_FIELD(data, RLC_PG_DELAY, MEM_SLEEP_DELAY, 0x10);
4031         WREG32(mmRLC_PG_DELAY, data);
4032
4033         WREG32_FIELD(RLC_PG_DELAY_2, SERDES_CMD_DELAY, 0x3);
4034         WREG32_FIELD(RLC_AUTO_PG_CTRL, GRBM_REG_SAVE_GFX_IDLE_THRESHOLD, 0x55f0);
4035
4036 }
4037
4038 static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
4039                                                 bool enable)
4040 {
4041         WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PU_ENABLE, enable ? 1 : 0);
4042 }
4043
4044 static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
4045                                                   bool enable)
4046 {
4047         WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PD_ENABLE, enable ? 1 : 0);
4048 }
4049
4050 static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable)
4051 {
4052         WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 0 : 1);
4053 }
4054
4055 static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
4056 {
4057         if ((adev->asic_type == CHIP_CARRIZO) ||
4058             (adev->asic_type == CHIP_STONEY)) {
4059                 gfx_v8_0_init_csb(adev);
4060                 gfx_v8_0_init_save_restore_list(adev);
4061                 gfx_v8_0_enable_save_restore_machine(adev);
4062                 WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
4063                 gfx_v8_0_init_power_gating(adev);
4064                 WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
4065         } else if ((adev->asic_type == CHIP_POLARIS11) ||
4066                    (adev->asic_type == CHIP_POLARIS12) ||
4067                    (adev->asic_type == CHIP_VEGAM)) {
4068                 gfx_v8_0_init_csb(adev);
4069                 gfx_v8_0_init_save_restore_list(adev);
4070                 gfx_v8_0_enable_save_restore_machine(adev);
4071                 gfx_v8_0_init_power_gating(adev);
4072         }
4073
4074 }
4075
4076 static void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
4077 {
4078         WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 0);
4079
4080         gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4081         gfx_v8_0_wait_for_rlc_serdes(adev);
4082 }
4083
4084 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
4085 {
4086         WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4087         udelay(50);
4088
4089         WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
4090         udelay(50);
4091 }
4092
4093 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
4094 {
4095         WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 1);
4096
4097         /* carrizo do enable cp interrupt after cp inited */
4098         if (!(adev->flags & AMD_IS_APU))
4099                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4100
4101         udelay(50);
4102 }
4103
4104 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
4105 {
4106         if (amdgpu_sriov_vf(adev)) {
4107                 gfx_v8_0_init_csb(adev);
4108                 return 0;
4109         }
4110
4111         adev->gfx.rlc.funcs->stop(adev);
4112         adev->gfx.rlc.funcs->reset(adev);
4113         gfx_v8_0_init_pg(adev);
4114         adev->gfx.rlc.funcs->start(adev);
4115
4116         return 0;
4117 }
4118
4119 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
4120 {
4121         u32 tmp = RREG32(mmCP_ME_CNTL);
4122
4123         if (enable) {
4124                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
4125                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
4126                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
4127         } else {
4128                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
4129                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
4130                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
4131         }
4132         WREG32(mmCP_ME_CNTL, tmp);
4133         udelay(50);
4134 }
4135
4136 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
4137 {
4138         u32 count = 0;
4139         const struct cs_section_def *sect = NULL;
4140         const struct cs_extent_def *ext = NULL;
4141
4142         /* begin clear state */
4143         count += 2;
4144         /* context control state */
4145         count += 3;
4146
4147         for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4148                 for (ext = sect->section; ext->extent != NULL; ++ext) {
4149                         if (sect->id == SECT_CONTEXT)
4150                                 count += 2 + ext->reg_count;
4151                         else
4152                                 return 0;
4153                 }
4154         }
4155         /* pa_sc_raster_config/pa_sc_raster_config1 */
4156         count += 4;
4157         /* end clear state */
4158         count += 2;
4159         /* clear state */
4160         count += 2;
4161
4162         return count;
4163 }
4164
4165 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
4166 {
4167         struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
4168         const struct cs_section_def *sect = NULL;
4169         const struct cs_extent_def *ext = NULL;
4170         int r, i;
4171
4172         /* init the CP */
4173         WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
4174         WREG32(mmCP_ENDIAN_SWAP, 0);
4175         WREG32(mmCP_DEVICE_ID, 1);
4176
4177         gfx_v8_0_cp_gfx_enable(adev, true);
4178
4179         r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
4180         if (r) {
4181                 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
4182                 return r;
4183         }
4184
4185         /* clear state buffer */
4186         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4187         amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4188
4189         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4190         amdgpu_ring_write(ring, 0x80000000);
4191         amdgpu_ring_write(ring, 0x80000000);
4192
4193         for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4194                 for (ext = sect->section; ext->extent != NULL; ++ext) {
4195                         if (sect->id == SECT_CONTEXT) {
4196                                 amdgpu_ring_write(ring,
4197                                        PACKET3(PACKET3_SET_CONTEXT_REG,
4198                                                ext->reg_count));
4199                                 amdgpu_ring_write(ring,
4200                                        ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
4201                                 for (i = 0; i < ext->reg_count; i++)
4202                                         amdgpu_ring_write(ring, ext->extent[i]);
4203                         }
4204                 }
4205         }
4206
4207         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4208         amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
4209         amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config);
4210         amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config_1);
4211
4212         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4213         amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4214
4215         amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4216         amdgpu_ring_write(ring, 0);
4217
4218         /* init the CE partitions */
4219         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4220         amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4221         amdgpu_ring_write(ring, 0x8000);
4222         amdgpu_ring_write(ring, 0x8000);
4223
4224         amdgpu_ring_commit(ring);
4225
4226         return 0;
4227 }
4228 static void gfx_v8_0_set_cpg_door_bell(struct amdgpu_device *adev, struct amdgpu_ring *ring)
4229 {
4230         u32 tmp;
4231         /* no gfx doorbells on iceland */
4232         if (adev->asic_type == CHIP_TOPAZ)
4233                 return;
4234
4235         tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
4236
4237         if (ring->use_doorbell) {
4238                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4239                                 DOORBELL_OFFSET, ring->doorbell_index);
4240                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4241                                                 DOORBELL_HIT, 0);
4242                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4243                                             DOORBELL_EN, 1);
4244         } else {
4245                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
4246         }
4247
4248         WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
4249
4250         if (adev->flags & AMD_IS_APU)
4251                 return;
4252
4253         tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
4254                                         DOORBELL_RANGE_LOWER,
4255                                         adev->doorbell_index.gfx_ring0);
4256         WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
4257
4258         WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
4259                 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
4260 }
4261
4262 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
4263 {
4264         struct amdgpu_ring *ring;
4265         u32 tmp;
4266         u32 rb_bufsz;
4267         u64 rb_addr, rptr_addr, wptr_gpu_addr;
4268
4269         /* Set the write pointer delay */
4270         WREG32(mmCP_RB_WPTR_DELAY, 0);
4271
4272         /* set the RB to use vmid 0 */
4273         WREG32(mmCP_RB_VMID, 0);
4274
4275         /* Set ring buffer size */
4276         ring = &adev->gfx.gfx_ring[0];
4277         rb_bufsz = order_base_2(ring->ring_size / 8);
4278         tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
4279         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
4280         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
4281         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
4282 #ifdef __BIG_ENDIAN
4283         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
4284 #endif
4285         WREG32(mmCP_RB0_CNTL, tmp);
4286
4287         /* Initialize the ring buffer's read and write pointers */
4288         WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
4289         ring->wptr = 0;
4290         WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
4291
4292         /* set the wb address wether it's enabled or not */
4293         rptr_addr = ring->rptr_gpu_addr;
4294         WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
4295         WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
4296
4297         wptr_gpu_addr = ring->wptr_gpu_addr;
4298         WREG32(mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
4299         WREG32(mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
4300         mdelay(1);
4301         WREG32(mmCP_RB0_CNTL, tmp);
4302
4303         rb_addr = ring->gpu_addr >> 8;
4304         WREG32(mmCP_RB0_BASE, rb_addr);
4305         WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
4306
4307         gfx_v8_0_set_cpg_door_bell(adev, ring);
4308         /* start the ring */
4309         amdgpu_ring_clear_ring(ring);
4310         gfx_v8_0_cp_gfx_start(adev);
4311         ring->sched.ready = true;
4312
4313         return 0;
4314 }
4315
4316 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
4317 {
4318         if (enable) {
4319                 WREG32(mmCP_MEC_CNTL, 0);
4320         } else {
4321                 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
4322                 adev->gfx.kiq.ring.sched.ready = false;
4323         }
4324         udelay(50);
4325 }
4326
4327 /* KIQ functions */
4328 static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring)
4329 {
4330         uint32_t tmp;
4331         struct amdgpu_device *adev = ring->adev;
4332
4333         /* tell RLC which is KIQ queue */
4334         tmp = RREG32(mmRLC_CP_SCHEDULERS);
4335         tmp &= 0xffffff00;
4336         tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
4337         WREG32(mmRLC_CP_SCHEDULERS, tmp);
4338         tmp |= 0x80;
4339         WREG32(mmRLC_CP_SCHEDULERS, tmp);
4340 }
4341
4342 static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev)
4343 {
4344         struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
4345         uint64_t queue_mask = 0;
4346         int r, i;
4347
4348         for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
4349                 if (!test_bit(i, adev->gfx.mec.queue_bitmap))
4350                         continue;
4351
4352                 /* This situation may be hit in the future if a new HW
4353                  * generation exposes more than 64 queues. If so, the
4354                  * definition of queue_mask needs updating */
4355                 if (WARN_ON(i >= (sizeof(queue_mask)*8))) {
4356                         DRM_ERROR("Invalid KCQ enabled: %d\n", i);
4357                         break;
4358                 }
4359
4360                 queue_mask |= (1ull << i);
4361         }
4362
4363         r = amdgpu_ring_alloc(kiq_ring, (8 * adev->gfx.num_compute_rings) + 8);
4364         if (r) {
4365                 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
4366                 return r;
4367         }
4368         /* set resources */
4369         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
4370         amdgpu_ring_write(kiq_ring, 0); /* vmid_mask:0 queue_type:0 (KIQ) */
4371         amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */
4372         amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */
4373         amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
4374         amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
4375         amdgpu_ring_write(kiq_ring, 0); /* oac mask */
4376         amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
4377         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4378                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4379                 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
4380                 uint64_t wptr_addr = ring->wptr_gpu_addr;
4381
4382                 /* map queues */
4383                 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
4384                 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
4385                 amdgpu_ring_write(kiq_ring,
4386                                   PACKET3_MAP_QUEUES_NUM_QUEUES(1));
4387                 amdgpu_ring_write(kiq_ring,
4388                                   PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index) |
4389                                   PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
4390                                   PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
4391                                   PACKET3_MAP_QUEUES_ME(ring->me == 1 ? 0 : 1)); /* doorbell */
4392                 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
4393                 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
4394                 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
4395                 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
4396         }
4397
4398         amdgpu_ring_commit(kiq_ring);
4399
4400         return 0;
4401 }
4402
4403 static int gfx_v8_0_deactivate_hqd(struct amdgpu_device *adev, u32 req)
4404 {
4405         int i, r = 0;
4406
4407         if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) {
4408                 WREG32_FIELD(CP_HQD_DEQUEUE_REQUEST, DEQUEUE_REQ, req);
4409                 for (i = 0; i < adev->usec_timeout; i++) {
4410                         if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK))
4411                                 break;
4412                         udelay(1);
4413                 }
4414                 if (i == adev->usec_timeout)
4415                         r = -ETIMEDOUT;
4416         }
4417         WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
4418         WREG32(mmCP_HQD_PQ_RPTR, 0);
4419         WREG32(mmCP_HQD_PQ_WPTR, 0);
4420
4421         return r;
4422 }
4423
4424 static void gfx_v8_0_mqd_set_priority(struct amdgpu_ring *ring, struct vi_mqd *mqd)
4425 {
4426         struct amdgpu_device *adev = ring->adev;
4427
4428         if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
4429                 if (amdgpu_gfx_is_high_priority_compute_queue(adev, ring)) {
4430                         mqd->cp_hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH;
4431                         mqd->cp_hqd_queue_priority =
4432                                 AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM;
4433                 }
4434         }
4435 }
4436
4437 static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
4438 {
4439         struct amdgpu_device *adev = ring->adev;
4440         struct vi_mqd *mqd = ring->mqd_ptr;
4441         uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
4442         uint32_t tmp;
4443
4444         mqd->header = 0xC0310800;
4445         mqd->compute_pipelinestat_enable = 0x00000001;
4446         mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4447         mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4448         mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4449         mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4450         mqd->compute_misc_reserved = 0x00000003;
4451         mqd->dynamic_cu_mask_addr_lo = lower_32_bits(ring->mqd_gpu_addr
4452                                                      + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4453         mqd->dynamic_cu_mask_addr_hi = upper_32_bits(ring->mqd_gpu_addr
4454                                                      + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4455         eop_base_addr = ring->eop_gpu_addr >> 8;
4456         mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
4457         mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
4458
4459         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4460         tmp = RREG32(mmCP_HQD_EOP_CONTROL);
4461         tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
4462                         (order_base_2(GFX8_MEC_HPD_SIZE / 4) - 1));
4463
4464         mqd->cp_hqd_eop_control = tmp;
4465
4466         /* enable doorbell? */
4467         tmp = REG_SET_FIELD(RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL),
4468                             CP_HQD_PQ_DOORBELL_CONTROL,
4469                             DOORBELL_EN,
4470                             ring->use_doorbell ? 1 : 0);
4471
4472         mqd->cp_hqd_pq_doorbell_control = tmp;
4473
4474         /* set the pointer to the MQD */
4475         mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
4476         mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
4477
4478         /* set MQD vmid to 0 */
4479         tmp = RREG32(mmCP_MQD_CONTROL);
4480         tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4481         mqd->cp_mqd_control = tmp;
4482
4483         /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4484         hqd_gpu_addr = ring->gpu_addr >> 8;
4485         mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4486         mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4487
4488         /* set up the HQD, this is similar to CP_RB0_CNTL */
4489         tmp = RREG32(mmCP_HQD_PQ_CONTROL);
4490         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4491                             (order_base_2(ring->ring_size / 4) - 1));
4492         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4493                         (order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1));
4494 #ifdef __BIG_ENDIAN
4495         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
4496 #endif
4497         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
4498         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
4499         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4500         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4501         mqd->cp_hqd_pq_control = tmp;
4502
4503         /* set the wb address whether it's enabled or not */
4504         wb_gpu_addr = ring->rptr_gpu_addr;
4505         mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4506         mqd->cp_hqd_pq_rptr_report_addr_hi =
4507                 upper_32_bits(wb_gpu_addr) & 0xffff;
4508
4509         /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4510         wb_gpu_addr = ring->wptr_gpu_addr;
4511         mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
4512         mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4513
4514         tmp = 0;
4515         /* enable the doorbell if requested */
4516         if (ring->use_doorbell) {
4517                 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4518                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4519                                 DOORBELL_OFFSET, ring->doorbell_index);
4520
4521                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4522                                          DOORBELL_EN, 1);
4523                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4524                                          DOORBELL_SOURCE, 0);
4525                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4526                                          DOORBELL_HIT, 0);
4527         }
4528
4529         mqd->cp_hqd_pq_doorbell_control = tmp;
4530
4531         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4532         ring->wptr = 0;
4533         mqd->cp_hqd_pq_wptr = ring->wptr;
4534         mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
4535
4536         /* set the vmid for the queue */
4537         mqd->cp_hqd_vmid = 0;
4538
4539         tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
4540         tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
4541         mqd->cp_hqd_persistent_state = tmp;
4542
4543         /* set MTYPE */
4544         tmp = RREG32(mmCP_HQD_IB_CONTROL);
4545         tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
4546         tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MTYPE, 3);
4547         mqd->cp_hqd_ib_control = tmp;
4548
4549         tmp = RREG32(mmCP_HQD_IQ_TIMER);
4550         tmp = REG_SET_FIELD(tmp, CP_HQD_IQ_TIMER, MTYPE, 3);
4551         mqd->cp_hqd_iq_timer = tmp;
4552
4553         tmp = RREG32(mmCP_HQD_CTX_SAVE_CONTROL);
4554         tmp = REG_SET_FIELD(tmp, CP_HQD_CTX_SAVE_CONTROL, MTYPE, 3);
4555         mqd->cp_hqd_ctx_save_control = tmp;
4556
4557         /* defaults */
4558         mqd->cp_hqd_eop_rptr = RREG32(mmCP_HQD_EOP_RPTR);
4559         mqd->cp_hqd_eop_wptr = RREG32(mmCP_HQD_EOP_WPTR);
4560         mqd->cp_hqd_ctx_save_base_addr_lo = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_LO);
4561         mqd->cp_hqd_ctx_save_base_addr_hi = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_HI);
4562         mqd->cp_hqd_cntl_stack_offset = RREG32(mmCP_HQD_CNTL_STACK_OFFSET);
4563         mqd->cp_hqd_cntl_stack_size = RREG32(mmCP_HQD_CNTL_STACK_SIZE);
4564         mqd->cp_hqd_wg_state_offset = RREG32(mmCP_HQD_WG_STATE_OFFSET);
4565         mqd->cp_hqd_ctx_save_size = RREG32(mmCP_HQD_CTX_SAVE_SIZE);
4566         mqd->cp_hqd_eop_done_events = RREG32(mmCP_HQD_EOP_EVENTS);
4567         mqd->cp_hqd_error = RREG32(mmCP_HQD_ERROR);
4568         mqd->cp_hqd_eop_wptr_mem = RREG32(mmCP_HQD_EOP_WPTR_MEM);
4569         mqd->cp_hqd_eop_dones = RREG32(mmCP_HQD_EOP_DONES);
4570
4571         /* set static priority for a queue/ring */
4572         gfx_v8_0_mqd_set_priority(ring, mqd);
4573         mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM);
4574
4575         /* map_queues packet doesn't need activate the queue,
4576          * so only kiq need set this field.
4577          */
4578         if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
4579                 mqd->cp_hqd_active = 1;
4580
4581         return 0;
4582 }
4583
4584 static int gfx_v8_0_mqd_commit(struct amdgpu_device *adev,
4585                         struct vi_mqd *mqd)
4586 {
4587         uint32_t mqd_reg;
4588         uint32_t *mqd_data;
4589
4590         /* HQD registers extend from mmCP_MQD_BASE_ADDR to mmCP_HQD_ERROR */
4591         mqd_data = &mqd->cp_mqd_base_addr_lo;
4592
4593         /* disable wptr polling */
4594         WREG32_FIELD(CP_PQ_WPTR_POLL_CNTL, EN, 0);
4595
4596         /* program all HQD registers */
4597         for (mqd_reg = mmCP_HQD_VMID; mqd_reg <= mmCP_HQD_EOP_CONTROL; mqd_reg++)
4598                 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4599
4600         /* Tonga errata: EOP RPTR/WPTR should be left unmodified.
4601          * This is safe since EOP RPTR==WPTR for any inactive HQD
4602          * on ASICs that do not support context-save.
4603          * EOP writes/reads can start anywhere in the ring.
4604          */
4605         if (adev->asic_type != CHIP_TONGA) {
4606                 WREG32(mmCP_HQD_EOP_RPTR, mqd->cp_hqd_eop_rptr);
4607                 WREG32(mmCP_HQD_EOP_WPTR, mqd->cp_hqd_eop_wptr);
4608                 WREG32(mmCP_HQD_EOP_WPTR_MEM, mqd->cp_hqd_eop_wptr_mem);
4609         }
4610
4611         for (mqd_reg = mmCP_HQD_EOP_EVENTS; mqd_reg <= mmCP_HQD_ERROR; mqd_reg++)
4612                 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4613
4614         /* activate the HQD */
4615         for (mqd_reg = mmCP_MQD_BASE_ADDR; mqd_reg <= mmCP_HQD_ACTIVE; mqd_reg++)
4616                 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4617
4618         return 0;
4619 }
4620
4621 static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring)
4622 {
4623         struct amdgpu_device *adev = ring->adev;
4624         struct vi_mqd *mqd = ring->mqd_ptr;
4625         int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
4626
4627         gfx_v8_0_kiq_setting(ring);
4628
4629         if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
4630                 /* reset MQD to a clean status */
4631                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4632                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4633
4634                 /* reset ring buffer */
4635                 ring->wptr = 0;
4636                 amdgpu_ring_clear_ring(ring);
4637                 mutex_lock(&adev->srbm_mutex);
4638                 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4639                 gfx_v8_0_mqd_commit(adev, mqd);
4640                 vi_srbm_select(adev, 0, 0, 0, 0);
4641                 mutex_unlock(&adev->srbm_mutex);
4642         } else {
4643                 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4644                 ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4645                 ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
4646                 if (amdgpu_sriov_vf(adev) && adev->in_suspend)
4647                         amdgpu_ring_clear_ring(ring);
4648                 mutex_lock(&adev->srbm_mutex);
4649                 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4650                 gfx_v8_0_mqd_init(ring);
4651                 gfx_v8_0_mqd_commit(adev, mqd);
4652                 vi_srbm_select(adev, 0, 0, 0, 0);
4653                 mutex_unlock(&adev->srbm_mutex);
4654
4655                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4656                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4657         }
4658
4659         return 0;
4660 }
4661
4662 static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring)
4663 {
4664         struct amdgpu_device *adev = ring->adev;
4665         struct vi_mqd *mqd = ring->mqd_ptr;
4666         int mqd_idx = ring - &adev->gfx.compute_ring[0];
4667
4668         if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
4669                 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4670                 ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4671                 ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
4672                 mutex_lock(&adev->srbm_mutex);
4673                 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4674                 gfx_v8_0_mqd_init(ring);
4675                 vi_srbm_select(adev, 0, 0, 0, 0);
4676                 mutex_unlock(&adev->srbm_mutex);
4677
4678                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4679                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4680         } else if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
4681                 /* reset MQD to a clean status */
4682                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4683                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4684                 /* reset ring buffer */
4685                 ring->wptr = 0;
4686                 amdgpu_ring_clear_ring(ring);
4687         } else {
4688                 amdgpu_ring_clear_ring(ring);
4689         }
4690         return 0;
4691 }
4692
4693 static void gfx_v8_0_set_mec_doorbell_range(struct amdgpu_device *adev)
4694 {
4695         if (adev->asic_type > CHIP_TONGA) {
4696                 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, adev->doorbell_index.kiq << 2);
4697                 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, adev->doorbell_index.mec_ring7 << 2);
4698         }
4699         /* enable doorbells */
4700         WREG32_FIELD(CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4701 }
4702
4703 static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev)
4704 {
4705         struct amdgpu_ring *ring;
4706         int r;
4707
4708         ring = &adev->gfx.kiq.ring;
4709
4710         r = amdgpu_bo_reserve(ring->mqd_obj, false);
4711         if (unlikely(r != 0))
4712                 return r;
4713
4714         r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4715         if (unlikely(r != 0))
4716                 return r;
4717
4718         gfx_v8_0_kiq_init_queue(ring);
4719         amdgpu_bo_kunmap(ring->mqd_obj);
4720         ring->mqd_ptr = NULL;
4721         amdgpu_bo_unreserve(ring->mqd_obj);
4722         ring->sched.ready = true;
4723         return 0;
4724 }
4725
4726 static int gfx_v8_0_kcq_resume(struct amdgpu_device *adev)
4727 {
4728         struct amdgpu_ring *ring = NULL;
4729         int r = 0, i;
4730
4731         gfx_v8_0_cp_compute_enable(adev, true);
4732
4733         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4734                 ring = &adev->gfx.compute_ring[i];
4735
4736                 r = amdgpu_bo_reserve(ring->mqd_obj, false);
4737                 if (unlikely(r != 0))
4738                         goto done;
4739                 r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4740                 if (!r) {
4741                         r = gfx_v8_0_kcq_init_queue(ring);
4742                         amdgpu_bo_kunmap(ring->mqd_obj);
4743                         ring->mqd_ptr = NULL;
4744                 }
4745                 amdgpu_bo_unreserve(ring->mqd_obj);
4746                 if (r)
4747                         goto done;
4748         }
4749
4750         gfx_v8_0_set_mec_doorbell_range(adev);
4751
4752         r = gfx_v8_0_kiq_kcq_enable(adev);
4753         if (r)
4754                 goto done;
4755
4756 done:
4757         return r;
4758 }
4759
4760 static int gfx_v8_0_cp_test_all_rings(struct amdgpu_device *adev)
4761 {
4762         int r, i;
4763         struct amdgpu_ring *ring;
4764
4765         /* collect all the ring_tests here, gfx, kiq, compute */
4766         ring = &adev->gfx.gfx_ring[0];
4767         r = amdgpu_ring_test_helper(ring);
4768         if (r)
4769                 return r;
4770
4771         ring = &adev->gfx.kiq.ring;
4772         r = amdgpu_ring_test_helper(ring);
4773         if (r)
4774                 return r;
4775
4776         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4777                 ring = &adev->gfx.compute_ring[i];
4778                 amdgpu_ring_test_helper(ring);
4779         }
4780
4781         return 0;
4782 }
4783
4784 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
4785 {
4786         int r;
4787
4788         if (!(adev->flags & AMD_IS_APU))
4789                 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4790
4791         r = gfx_v8_0_kiq_resume(adev);
4792         if (r)
4793                 return r;
4794
4795         r = gfx_v8_0_cp_gfx_resume(adev);
4796         if (r)
4797                 return r;
4798
4799         r = gfx_v8_0_kcq_resume(adev);
4800         if (r)
4801                 return r;
4802
4803         r = gfx_v8_0_cp_test_all_rings(adev);
4804         if (r)
4805                 return r;
4806
4807         gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4808
4809         return 0;
4810 }
4811
4812 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
4813 {
4814         gfx_v8_0_cp_gfx_enable(adev, enable);
4815         gfx_v8_0_cp_compute_enable(adev, enable);
4816 }
4817
4818 static int gfx_v8_0_hw_init(void *handle)
4819 {
4820         int r;
4821         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4822
4823         gfx_v8_0_init_golden_registers(adev);
4824         gfx_v8_0_constants_init(adev);
4825
4826         r = adev->gfx.rlc.funcs->resume(adev);
4827         if (r)
4828                 return r;
4829
4830         r = gfx_v8_0_cp_resume(adev);
4831
4832         return r;
4833 }
4834
4835 static int gfx_v8_0_kcq_disable(struct amdgpu_device *adev)
4836 {
4837         int r, i;
4838         struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
4839
4840         r = amdgpu_ring_alloc(kiq_ring, 6 * adev->gfx.num_compute_rings);
4841         if (r)
4842                 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
4843
4844         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4845                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4846
4847                 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
4848                 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
4849                                                 PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */
4850                                                 PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
4851                                                 PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) |
4852                                                 PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
4853                 amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
4854                 amdgpu_ring_write(kiq_ring, 0);
4855                 amdgpu_ring_write(kiq_ring, 0);
4856                 amdgpu_ring_write(kiq_ring, 0);
4857         }
4858         r = amdgpu_ring_test_helper(kiq_ring);
4859         if (r)
4860                 DRM_ERROR("KCQ disable failed\n");
4861
4862         return r;
4863 }
4864
4865 static bool gfx_v8_0_is_idle(void *handle)
4866 {
4867         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4868
4869         if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE)
4870                 || RREG32(mmGRBM_STATUS2) != 0x8)
4871                 return false;
4872         else
4873                 return true;
4874 }
4875
4876 static bool gfx_v8_0_rlc_is_idle(void *handle)
4877 {
4878         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4879
4880         if (RREG32(mmGRBM_STATUS2) != 0x8)
4881                 return false;
4882         else
4883                 return true;
4884 }
4885
4886 static int gfx_v8_0_wait_for_rlc_idle(void *handle)
4887 {
4888         unsigned int i;
4889         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4890
4891         for (i = 0; i < adev->usec_timeout; i++) {
4892                 if (gfx_v8_0_rlc_is_idle(handle))
4893                         return 0;
4894
4895                 udelay(1);
4896         }
4897         return -ETIMEDOUT;
4898 }
4899
4900 static int gfx_v8_0_wait_for_idle(void *handle)
4901 {
4902         unsigned int i;
4903         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4904
4905         for (i = 0; i < adev->usec_timeout; i++) {
4906                 if (gfx_v8_0_is_idle(handle))
4907                         return 0;
4908
4909                 udelay(1);
4910         }
4911         return -ETIMEDOUT;
4912 }
4913
4914 static int gfx_v8_0_hw_fini(void *handle)
4915 {
4916         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4917
4918         amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
4919         amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
4920
4921         amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
4922
4923         amdgpu_irq_put(adev, &adev->gfx.sq_irq, 0);
4924
4925         /* disable KCQ to avoid CPC touch memory not valid anymore */
4926         gfx_v8_0_kcq_disable(adev);
4927
4928         if (amdgpu_sriov_vf(adev)) {
4929                 pr_debug("For SRIOV client, shouldn't do anything.\n");
4930                 return 0;
4931         }
4932         amdgpu_gfx_rlc_enter_safe_mode(adev);
4933         if (!gfx_v8_0_wait_for_idle(adev))
4934                 gfx_v8_0_cp_enable(adev, false);
4935         else
4936                 pr_err("cp is busy, skip halt cp\n");
4937         if (!gfx_v8_0_wait_for_rlc_idle(adev))
4938                 adev->gfx.rlc.funcs->stop(adev);
4939         else
4940                 pr_err("rlc is busy, skip halt rlc\n");
4941         amdgpu_gfx_rlc_exit_safe_mode(adev);
4942
4943         return 0;
4944 }
4945
4946 static int gfx_v8_0_suspend(void *handle)
4947 {
4948         return gfx_v8_0_hw_fini(handle);
4949 }
4950
4951 static int gfx_v8_0_resume(void *handle)
4952 {
4953         return gfx_v8_0_hw_init(handle);
4954 }
4955
4956 static bool gfx_v8_0_check_soft_reset(void *handle)
4957 {
4958         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4959         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4960         u32 tmp;
4961
4962         /* GRBM_STATUS */
4963         tmp = RREG32(mmGRBM_STATUS);
4964         if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
4965                    GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
4966                    GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
4967                    GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
4968                    GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
4969                    GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK |
4970                    GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
4971                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4972                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4973                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4974                                                 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
4975                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4976                                                 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
4977         }
4978
4979         /* GRBM_STATUS2 */
4980         tmp = RREG32(mmGRBM_STATUS2);
4981         if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
4982                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4983                                                 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4984
4985         if (REG_GET_FIELD(tmp, GRBM_STATUS2, CPF_BUSY) ||
4986             REG_GET_FIELD(tmp, GRBM_STATUS2, CPC_BUSY) ||
4987             REG_GET_FIELD(tmp, GRBM_STATUS2, CPG_BUSY)) {
4988                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4989                                                 SOFT_RESET_CPF, 1);
4990                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4991                                                 SOFT_RESET_CPC, 1);
4992                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4993                                                 SOFT_RESET_CPG, 1);
4994                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET,
4995                                                 SOFT_RESET_GRBM, 1);
4996         }
4997
4998         /* SRBM_STATUS */
4999         tmp = RREG32(mmSRBM_STATUS);
5000         if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
5001                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5002                                                 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5003         if (REG_GET_FIELD(tmp, SRBM_STATUS, SEM_BUSY))
5004                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5005                                                 SRBM_SOFT_RESET, SOFT_RESET_SEM, 1);
5006
5007         if (grbm_soft_reset || srbm_soft_reset) {
5008                 adev->gfx.grbm_soft_reset = grbm_soft_reset;
5009                 adev->gfx.srbm_soft_reset = srbm_soft_reset;
5010                 return true;
5011         } else {
5012                 adev->gfx.grbm_soft_reset = 0;
5013                 adev->gfx.srbm_soft_reset = 0;
5014                 return false;
5015         }
5016 }
5017
5018 static int gfx_v8_0_pre_soft_reset(void *handle)
5019 {
5020         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5021         u32 grbm_soft_reset = 0;
5022
5023         if ((!adev->gfx.grbm_soft_reset) &&
5024             (!adev->gfx.srbm_soft_reset))
5025                 return 0;
5026
5027         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5028
5029         /* stop the rlc */
5030         adev->gfx.rlc.funcs->stop(adev);
5031
5032         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5033             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5034                 /* Disable GFX parsing/prefetching */
5035                 gfx_v8_0_cp_gfx_enable(adev, false);
5036
5037         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5038             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5039             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5040             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5041                 int i;
5042
5043                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5044                         struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5045
5046                         mutex_lock(&adev->srbm_mutex);
5047                         vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5048                         gfx_v8_0_deactivate_hqd(adev, 2);
5049                         vi_srbm_select(adev, 0, 0, 0, 0);
5050                         mutex_unlock(&adev->srbm_mutex);
5051                 }
5052                 /* Disable MEC parsing/prefetching */
5053                 gfx_v8_0_cp_compute_enable(adev, false);
5054         }
5055
5056         return 0;
5057 }
5058
5059 static int gfx_v8_0_soft_reset(void *handle)
5060 {
5061         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5062         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5063         u32 tmp;
5064
5065         if ((!adev->gfx.grbm_soft_reset) &&
5066             (!adev->gfx.srbm_soft_reset))
5067                 return 0;
5068
5069         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5070         srbm_soft_reset = adev->gfx.srbm_soft_reset;
5071
5072         if (grbm_soft_reset || srbm_soft_reset) {
5073                 tmp = RREG32(mmGMCON_DEBUG);
5074                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 1);
5075                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 1);
5076                 WREG32(mmGMCON_DEBUG, tmp);
5077                 udelay(50);
5078         }
5079
5080         if (grbm_soft_reset) {
5081                 tmp = RREG32(mmGRBM_SOFT_RESET);
5082                 tmp |= grbm_soft_reset;
5083                 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5084                 WREG32(mmGRBM_SOFT_RESET, tmp);
5085                 tmp = RREG32(mmGRBM_SOFT_RESET);
5086
5087                 udelay(50);
5088
5089                 tmp &= ~grbm_soft_reset;
5090                 WREG32(mmGRBM_SOFT_RESET, tmp);
5091                 tmp = RREG32(mmGRBM_SOFT_RESET);
5092         }
5093
5094         if (srbm_soft_reset) {
5095                 tmp = RREG32(mmSRBM_SOFT_RESET);
5096                 tmp |= srbm_soft_reset;
5097                 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5098                 WREG32(mmSRBM_SOFT_RESET, tmp);
5099                 tmp = RREG32(mmSRBM_SOFT_RESET);
5100
5101                 udelay(50);
5102
5103                 tmp &= ~srbm_soft_reset;
5104                 WREG32(mmSRBM_SOFT_RESET, tmp);
5105                 tmp = RREG32(mmSRBM_SOFT_RESET);
5106         }
5107
5108         if (grbm_soft_reset || srbm_soft_reset) {
5109                 tmp = RREG32(mmGMCON_DEBUG);
5110                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 0);
5111                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 0);
5112                 WREG32(mmGMCON_DEBUG, tmp);
5113         }
5114
5115         /* Wait a little for things to settle down */
5116         udelay(50);
5117
5118         return 0;
5119 }
5120
5121 static int gfx_v8_0_post_soft_reset(void *handle)
5122 {
5123         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5124         u32 grbm_soft_reset = 0;
5125
5126         if ((!adev->gfx.grbm_soft_reset) &&
5127             (!adev->gfx.srbm_soft_reset))
5128                 return 0;
5129
5130         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5131
5132         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5133             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5134             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5135             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5136                 int i;
5137
5138                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5139                         struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5140
5141                         mutex_lock(&adev->srbm_mutex);
5142                         vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5143                         gfx_v8_0_deactivate_hqd(adev, 2);
5144                         vi_srbm_select(adev, 0, 0, 0, 0);
5145                         mutex_unlock(&adev->srbm_mutex);
5146                 }
5147                 gfx_v8_0_kiq_resume(adev);
5148                 gfx_v8_0_kcq_resume(adev);
5149         }
5150
5151         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5152             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5153                 gfx_v8_0_cp_gfx_resume(adev);
5154
5155         gfx_v8_0_cp_test_all_rings(adev);
5156
5157         adev->gfx.rlc.funcs->start(adev);
5158
5159         return 0;
5160 }
5161
5162 /**
5163  * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
5164  *
5165  * @adev: amdgpu_device pointer
5166  *
5167  * Fetches a GPU clock counter snapshot.
5168  * Returns the 64 bit clock counter snapshot.
5169  */
5170 static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
5171 {
5172         uint64_t clock;
5173
5174         mutex_lock(&adev->gfx.gpu_clock_mutex);
5175         WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
5176         clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
5177                 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5178         mutex_unlock(&adev->gfx.gpu_clock_mutex);
5179         return clock;
5180 }
5181
5182 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
5183                                           uint32_t vmid,
5184                                           uint32_t gds_base, uint32_t gds_size,
5185                                           uint32_t gws_base, uint32_t gws_size,
5186                                           uint32_t oa_base, uint32_t oa_size)
5187 {
5188         /* GDS Base */
5189         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5190         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5191                                 WRITE_DATA_DST_SEL(0)));
5192         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
5193         amdgpu_ring_write(ring, 0);
5194         amdgpu_ring_write(ring, gds_base);
5195
5196         /* GDS Size */
5197         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5198         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5199                                 WRITE_DATA_DST_SEL(0)));
5200         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
5201         amdgpu_ring_write(ring, 0);
5202         amdgpu_ring_write(ring, gds_size);
5203
5204         /* GWS */
5205         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5206         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5207                                 WRITE_DATA_DST_SEL(0)));
5208         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
5209         amdgpu_ring_write(ring, 0);
5210         amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
5211
5212         /* OA */
5213         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5214         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5215                                 WRITE_DATA_DST_SEL(0)));
5216         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
5217         amdgpu_ring_write(ring, 0);
5218         amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
5219 }
5220
5221 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
5222 {
5223         WREG32(mmSQ_IND_INDEX,
5224                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5225                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5226                 (address << SQ_IND_INDEX__INDEX__SHIFT) |
5227                 (SQ_IND_INDEX__FORCE_READ_MASK));
5228         return RREG32(mmSQ_IND_DATA);
5229 }
5230
5231 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
5232                            uint32_t wave, uint32_t thread,
5233                            uint32_t regno, uint32_t num, uint32_t *out)
5234 {
5235         WREG32(mmSQ_IND_INDEX,
5236                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5237                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5238                 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
5239                 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
5240                 (SQ_IND_INDEX__FORCE_READ_MASK) |
5241                 (SQ_IND_INDEX__AUTO_INCR_MASK));
5242         while (num--)
5243                 *(out++) = RREG32(mmSQ_IND_DATA);
5244 }
5245
5246 static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
5247 {
5248         /* type 0 wave data */
5249         dst[(*no_fields)++] = 0;
5250         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
5251         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
5252         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
5253         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
5254         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
5255         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
5256         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
5257         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
5258         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
5259         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
5260         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
5261         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
5262         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_LO);
5263         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_HI);
5264         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_LO);
5265         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI);
5266         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
5267         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
5268         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_MODE);
5269 }
5270
5271 static void gfx_v8_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
5272                                      uint32_t wave, uint32_t start,
5273                                      uint32_t size, uint32_t *dst)
5274 {
5275         wave_read_regs(
5276                 adev, simd, wave, 0,
5277                 start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
5278 }
5279
5280
5281 static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = {
5282         .get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter,
5283         .select_se_sh = &gfx_v8_0_select_se_sh,
5284         .read_wave_data = &gfx_v8_0_read_wave_data,
5285         .read_wave_sgprs = &gfx_v8_0_read_wave_sgprs,
5286         .select_me_pipe_q = &gfx_v8_0_select_me_pipe_q
5287 };
5288
5289 static int gfx_v8_0_early_init(void *handle)
5290 {
5291         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5292
5293         adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
5294         adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev),
5295                                           AMDGPU_MAX_COMPUTE_RINGS);
5296         adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
5297         gfx_v8_0_set_ring_funcs(adev);
5298         gfx_v8_0_set_irq_funcs(adev);
5299         gfx_v8_0_set_gds_init(adev);
5300         gfx_v8_0_set_rlc_funcs(adev);
5301
5302         return 0;
5303 }
5304
5305 static int gfx_v8_0_late_init(void *handle)
5306 {
5307         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5308         int r;
5309
5310         r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
5311         if (r)
5312                 return r;
5313
5314         r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
5315         if (r)
5316                 return r;
5317
5318         /* requires IBs so do in late init after IB pool is initialized */
5319         r = gfx_v8_0_do_edc_gpr_workarounds(adev);
5320         if (r)
5321                 return r;
5322
5323         r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0);
5324         if (r) {
5325                 DRM_ERROR("amdgpu_irq_get() failed to get IRQ for EDC, r: %d.\n", r);
5326                 return r;
5327         }
5328
5329         r = amdgpu_irq_get(adev, &adev->gfx.sq_irq, 0);
5330         if (r) {
5331                 DRM_ERROR(
5332                         "amdgpu_irq_get() failed to get IRQ for SQ, r: %d.\n",
5333                         r);
5334                 return r;
5335         }
5336
5337         return 0;
5338 }
5339
5340 static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
5341                                                        bool enable)
5342 {
5343         if ((adev->asic_type == CHIP_POLARIS11) ||
5344             (adev->asic_type == CHIP_POLARIS12) ||
5345             (adev->asic_type == CHIP_VEGAM))
5346                 /* Send msg to SMU via Powerplay */
5347                 amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, enable);
5348
5349         WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0);
5350 }
5351
5352 static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
5353                                                         bool enable)
5354 {
5355         WREG32_FIELD(RLC_PG_CNTL, DYN_PER_CU_PG_ENABLE, enable ? 1 : 0);
5356 }
5357
5358 static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
5359                 bool enable)
5360 {
5361         WREG32_FIELD(RLC_PG_CNTL, QUICK_PG_ENABLE, enable ? 1 : 0);
5362 }
5363
5364 static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
5365                                           bool enable)
5366 {
5367         WREG32_FIELD(RLC_PG_CNTL, GFX_POWER_GATING_ENABLE, enable ? 1 : 0);
5368 }
5369
5370 static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev,
5371                                                 bool enable)
5372 {
5373         WREG32_FIELD(RLC_PG_CNTL, GFX_PIPELINE_PG_ENABLE, enable ? 1 : 0);
5374
5375         /* Read any GFX register to wake up GFX. */
5376         if (!enable)
5377                 RREG32(mmDB_RENDER_CONTROL);
5378 }
5379
5380 static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev,
5381                                           bool enable)
5382 {
5383         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
5384                 cz_enable_gfx_cg_power_gating(adev, true);
5385                 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
5386                         cz_enable_gfx_pipeline_power_gating(adev, true);
5387         } else {
5388                 cz_enable_gfx_cg_power_gating(adev, false);
5389                 cz_enable_gfx_pipeline_power_gating(adev, false);
5390         }
5391 }
5392
5393 static int gfx_v8_0_set_powergating_state(void *handle,
5394                                           enum amd_powergating_state state)
5395 {
5396         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5397         bool enable = (state == AMD_PG_STATE_GATE);
5398
5399         if (amdgpu_sriov_vf(adev))
5400                 return 0;
5401
5402         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_SMG |
5403                                 AMD_PG_SUPPORT_RLC_SMU_HS |
5404                                 AMD_PG_SUPPORT_CP |
5405                                 AMD_PG_SUPPORT_GFX_DMG))
5406                 amdgpu_gfx_rlc_enter_safe_mode(adev);
5407         switch (adev->asic_type) {
5408         case CHIP_CARRIZO:
5409         case CHIP_STONEY:
5410
5411                 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
5412                         cz_enable_sck_slow_down_on_power_up(adev, true);
5413                         cz_enable_sck_slow_down_on_power_down(adev, true);
5414                 } else {
5415                         cz_enable_sck_slow_down_on_power_up(adev, false);
5416                         cz_enable_sck_slow_down_on_power_down(adev, false);
5417                 }
5418                 if (adev->pg_flags & AMD_PG_SUPPORT_CP)
5419                         cz_enable_cp_power_gating(adev, true);
5420                 else
5421                         cz_enable_cp_power_gating(adev, false);
5422
5423                 cz_update_gfx_cg_power_gating(adev, enable);
5424
5425                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5426                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5427                 else
5428                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5429
5430                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5431                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5432                 else
5433                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5434                 break;
5435         case CHIP_POLARIS11:
5436         case CHIP_POLARIS12:
5437         case CHIP_VEGAM:
5438                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5439                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5440                 else
5441                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5442
5443                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5444                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5445                 else
5446                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5447
5448                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable)
5449                         polaris11_enable_gfx_quick_mg_power_gating(adev, true);
5450                 else
5451                         polaris11_enable_gfx_quick_mg_power_gating(adev, false);
5452                 break;
5453         default:
5454                 break;
5455         }
5456         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_SMG |
5457                                 AMD_PG_SUPPORT_RLC_SMU_HS |
5458                                 AMD_PG_SUPPORT_CP |
5459                                 AMD_PG_SUPPORT_GFX_DMG))
5460                 amdgpu_gfx_rlc_exit_safe_mode(adev);
5461         return 0;
5462 }
5463
5464 static void gfx_v8_0_get_clockgating_state(void *handle, u64 *flags)
5465 {
5466         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5467         int data;
5468
5469         if (amdgpu_sriov_vf(adev))
5470                 *flags = 0;
5471
5472         /* AMD_CG_SUPPORT_GFX_MGCG */
5473         data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5474         if (!(data & RLC_CGTT_MGCG_OVERRIDE__CPF_MASK))
5475                 *flags |= AMD_CG_SUPPORT_GFX_MGCG;
5476
5477         /* AMD_CG_SUPPORT_GFX_CGLG */
5478         data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5479         if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5480                 *flags |= AMD_CG_SUPPORT_GFX_CGCG;
5481
5482         /* AMD_CG_SUPPORT_GFX_CGLS */
5483         if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5484                 *flags |= AMD_CG_SUPPORT_GFX_CGLS;
5485
5486         /* AMD_CG_SUPPORT_GFX_CGTS */
5487         data = RREG32(mmCGTS_SM_CTRL_REG);
5488         if (!(data & CGTS_SM_CTRL_REG__OVERRIDE_MASK))
5489                 *flags |= AMD_CG_SUPPORT_GFX_CGTS;
5490
5491         /* AMD_CG_SUPPORT_GFX_CGTS_LS */
5492         if (!(data & CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK))
5493                 *flags |= AMD_CG_SUPPORT_GFX_CGTS_LS;
5494
5495         /* AMD_CG_SUPPORT_GFX_RLC_LS */
5496         data = RREG32(mmRLC_MEM_SLP_CNTL);
5497         if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
5498                 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
5499
5500         /* AMD_CG_SUPPORT_GFX_CP_LS */
5501         data = RREG32(mmCP_MEM_SLP_CNTL);
5502         if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
5503                 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
5504 }
5505
5506 static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
5507                                      uint32_t reg_addr, uint32_t cmd)
5508 {
5509         uint32_t data;
5510
5511         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
5512
5513         WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5514         WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5515
5516         data = RREG32(mmRLC_SERDES_WR_CTRL);
5517         if (adev->asic_type == CHIP_STONEY)
5518                 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5519                           RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5520                           RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5521                           RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5522                           RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5523                           RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5524                           RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5525                           RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5526                           RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5527         else
5528                 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5529                           RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5530                           RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5531                           RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5532                           RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5533                           RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5534                           RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5535                           RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5536                           RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
5537                           RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
5538                           RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5539         data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
5540                  (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
5541                  (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
5542                  (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
5543
5544         WREG32(mmRLC_SERDES_WR_CTRL, data);
5545 }
5546
5547 #define MSG_ENTER_RLC_SAFE_MODE     1
5548 #define MSG_EXIT_RLC_SAFE_MODE      0
5549 #define RLC_GPR_REG2__REQ_MASK 0x00000001
5550 #define RLC_GPR_REG2__REQ__SHIFT 0
5551 #define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001
5552 #define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e
5553
5554 static bool gfx_v8_0_is_rlc_enabled(struct amdgpu_device *adev)
5555 {
5556         uint32_t rlc_setting;
5557
5558         rlc_setting = RREG32(mmRLC_CNTL);
5559         if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
5560                 return false;
5561
5562         return true;
5563 }
5564
5565 static void gfx_v8_0_set_safe_mode(struct amdgpu_device *adev)
5566 {
5567         uint32_t data;
5568         unsigned i;
5569         data = RREG32(mmRLC_CNTL);
5570         data |= RLC_SAFE_MODE__CMD_MASK;
5571         data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5572         data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
5573         WREG32(mmRLC_SAFE_MODE, data);
5574
5575         /* wait for RLC_SAFE_MODE */
5576         for (i = 0; i < adev->usec_timeout; i++) {
5577                 if ((RREG32(mmRLC_GPM_STAT) &
5578                      (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5579                       RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5580                     (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5581                      RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5582                         break;
5583                 udelay(1);
5584         }
5585         for (i = 0; i < adev->usec_timeout; i++) {
5586                 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5587                         break;
5588                 udelay(1);
5589         }
5590 }
5591
5592 static void gfx_v8_0_unset_safe_mode(struct amdgpu_device *adev)
5593 {
5594         uint32_t data;
5595         unsigned i;
5596
5597         data = RREG32(mmRLC_CNTL);
5598         data |= RLC_SAFE_MODE__CMD_MASK;
5599         data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5600         WREG32(mmRLC_SAFE_MODE, data);
5601
5602         for (i = 0; i < adev->usec_timeout; i++) {
5603                 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5604                         break;
5605                 udelay(1);
5606         }
5607 }
5608
5609 static void gfx_v8_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid)
5610 {
5611         u32 data;
5612
5613         amdgpu_gfx_off_ctrl(adev, false);
5614
5615         if (amdgpu_sriov_is_pp_one_vf(adev))
5616                 data = RREG32_NO_KIQ(mmRLC_SPM_VMID);
5617         else
5618                 data = RREG32(mmRLC_SPM_VMID);
5619
5620         data &= ~RLC_SPM_VMID__RLC_SPM_VMID_MASK;
5621         data |= (vmid & RLC_SPM_VMID__RLC_SPM_VMID_MASK) << RLC_SPM_VMID__RLC_SPM_VMID__SHIFT;
5622
5623         if (amdgpu_sriov_is_pp_one_vf(adev))
5624                 WREG32_NO_KIQ(mmRLC_SPM_VMID, data);
5625         else
5626                 WREG32(mmRLC_SPM_VMID, data);
5627
5628         amdgpu_gfx_off_ctrl(adev, true);
5629 }
5630
5631 static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
5632         .is_rlc_enabled = gfx_v8_0_is_rlc_enabled,
5633         .set_safe_mode = gfx_v8_0_set_safe_mode,
5634         .unset_safe_mode = gfx_v8_0_unset_safe_mode,
5635         .init = gfx_v8_0_rlc_init,
5636         .get_csb_size = gfx_v8_0_get_csb_size,
5637         .get_csb_buffer = gfx_v8_0_get_csb_buffer,
5638         .get_cp_table_num = gfx_v8_0_cp_jump_table_num,
5639         .resume = gfx_v8_0_rlc_resume,
5640         .stop = gfx_v8_0_rlc_stop,
5641         .reset = gfx_v8_0_rlc_reset,
5642         .start = gfx_v8_0_rlc_start,
5643         .update_spm_vmid = gfx_v8_0_update_spm_vmid
5644 };
5645
5646 static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
5647                                                       bool enable)
5648 {
5649         uint32_t temp, data;
5650
5651         amdgpu_gfx_rlc_enter_safe_mode(adev);
5652
5653         /* It is disabled by HW by default */
5654         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
5655                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5656                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS)
5657                                 /* 1 - RLC memory Light sleep */
5658                                 WREG32_FIELD(RLC_MEM_SLP_CNTL, RLC_MEM_LS_EN, 1);
5659
5660                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS)
5661                                 WREG32_FIELD(CP_MEM_SLP_CNTL, CP_MEM_LS_EN, 1);
5662                 }
5663
5664                 /* 3 - RLC_CGTT_MGCG_OVERRIDE */
5665                 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5666                 if (adev->flags & AMD_IS_APU)
5667                         data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5668                                   RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5669                                   RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK);
5670                 else
5671                         data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5672                                   RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5673                                   RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5674                                   RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5675
5676                 if (temp != data)
5677                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5678
5679                 /* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5680                 gfx_v8_0_wait_for_rlc_serdes(adev);
5681
5682                 /* 5 - clear mgcg override */
5683                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5684
5685                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
5686                         /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
5687                         temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5688                         data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
5689                         data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
5690                         data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
5691                         data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
5692                         if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
5693                             (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
5694                                 data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
5695                         data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
5696                         data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
5697                         if (temp != data)
5698                                 WREG32(mmCGTS_SM_CTRL_REG, data);
5699                 }
5700                 udelay(50);
5701
5702                 /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5703                 gfx_v8_0_wait_for_rlc_serdes(adev);
5704         } else {
5705                 /* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
5706                 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5707                 data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5708                                 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5709                                 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5710                                 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5711                 if (temp != data)
5712                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5713
5714                 /* 2 - disable MGLS in RLC */
5715                 data = RREG32(mmRLC_MEM_SLP_CNTL);
5716                 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
5717                         data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5718                         WREG32(mmRLC_MEM_SLP_CNTL, data);
5719                 }
5720
5721                 /* 3 - disable MGLS in CP */
5722                 data = RREG32(mmCP_MEM_SLP_CNTL);
5723                 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
5724                         data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5725                         WREG32(mmCP_MEM_SLP_CNTL, data);
5726                 }
5727
5728                 /* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
5729                 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5730                 data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
5731                                 CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
5732                 if (temp != data)
5733                         WREG32(mmCGTS_SM_CTRL_REG, data);
5734
5735                 /* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5736                 gfx_v8_0_wait_for_rlc_serdes(adev);
5737
5738                 /* 6 - set mgcg override */
5739                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5740
5741                 udelay(50);
5742
5743                 /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5744                 gfx_v8_0_wait_for_rlc_serdes(adev);
5745         }
5746
5747         amdgpu_gfx_rlc_exit_safe_mode(adev);
5748 }
5749
5750 static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5751                                                       bool enable)
5752 {
5753         uint32_t temp, temp1, data, data1;
5754
5755         temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5756
5757         amdgpu_gfx_rlc_enter_safe_mode(adev);
5758
5759         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
5760                 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5761                 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
5762                 if (temp1 != data1)
5763                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5764
5765                 /* : wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5766                 gfx_v8_0_wait_for_rlc_serdes(adev);
5767
5768                 /* 2 - clear cgcg override */
5769                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5770
5771                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5772                 gfx_v8_0_wait_for_rlc_serdes(adev);
5773
5774                 /* 3 - write cmd to set CGLS */
5775                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
5776
5777                 /* 4 - enable cgcg */
5778                 data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5779
5780                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5781                         /* enable cgls*/
5782                         data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5783
5784                         temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5785                         data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
5786
5787                         if (temp1 != data1)
5788                                 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5789                 } else {
5790                         data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5791                 }
5792
5793                 if (temp != data)
5794                         WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5795
5796                 /* 5 enable cntx_empty_int_enable/cntx_busy_int_enable/
5797                  * Cmp_busy/GFX_Idle interrupts
5798                  */
5799                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5800         } else {
5801                 /* disable cntx_empty_int_enable & GFX Idle interrupt */
5802                 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5803
5804                 /* TEST CGCG */
5805                 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5806                 data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
5807                                 RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
5808                 if (temp1 != data1)
5809                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5810
5811                 /* read gfx register to wake up cgcg */
5812                 RREG32(mmCB_CGTT_SCLK_CTRL);
5813                 RREG32(mmCB_CGTT_SCLK_CTRL);
5814                 RREG32(mmCB_CGTT_SCLK_CTRL);
5815                 RREG32(mmCB_CGTT_SCLK_CTRL);
5816
5817                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5818                 gfx_v8_0_wait_for_rlc_serdes(adev);
5819
5820                 /* write cmd to Set CGCG Override */
5821                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5822
5823                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5824                 gfx_v8_0_wait_for_rlc_serdes(adev);
5825
5826                 /* write cmd to Clear CGLS */
5827                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
5828
5829                 /* disable cgcg, cgls should be disabled too. */
5830                 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
5831                           RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
5832                 if (temp != data)
5833                         WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5834                 /* enable interrupts again for PG */
5835                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5836         }
5837
5838         gfx_v8_0_wait_for_rlc_serdes(adev);
5839
5840         amdgpu_gfx_rlc_exit_safe_mode(adev);
5841 }
5842 static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
5843                                             bool enable)
5844 {
5845         if (enable) {
5846                 /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
5847                  * ===  MGCG + MGLS + TS(CG/LS) ===
5848                  */
5849                 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5850                 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5851         } else {
5852                 /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
5853                  * ===  CGCG + CGLS ===
5854                  */
5855                 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5856                 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5857         }
5858         return 0;
5859 }
5860
5861 static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev,
5862                                           enum amd_clockgating_state state)
5863 {
5864         uint32_t msg_id, pp_state = 0;
5865         uint32_t pp_support_state = 0;
5866
5867         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
5868                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5869                         pp_support_state = PP_STATE_SUPPORT_LS;
5870                         pp_state = PP_STATE_LS;
5871                 }
5872                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
5873                         pp_support_state |= PP_STATE_SUPPORT_CG;
5874                         pp_state |= PP_STATE_CG;
5875                 }
5876                 if (state == AMD_CG_STATE_UNGATE)
5877                         pp_state = 0;
5878
5879                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5880                                 PP_BLOCK_GFX_CG,
5881                                 pp_support_state,
5882                                 pp_state);
5883                 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5884         }
5885
5886         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
5887                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5888                         pp_support_state = PP_STATE_SUPPORT_LS;
5889                         pp_state = PP_STATE_LS;
5890                 }
5891
5892                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
5893                         pp_support_state |= PP_STATE_SUPPORT_CG;
5894                         pp_state |= PP_STATE_CG;
5895                 }
5896
5897                 if (state == AMD_CG_STATE_UNGATE)
5898                         pp_state = 0;
5899
5900                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5901                                 PP_BLOCK_GFX_MG,
5902                                 pp_support_state,
5903                                 pp_state);
5904                 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5905         }
5906
5907         return 0;
5908 }
5909
5910 static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev,
5911                                           enum amd_clockgating_state state)
5912 {
5913
5914         uint32_t msg_id, pp_state = 0;
5915         uint32_t pp_support_state = 0;
5916
5917         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
5918                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5919                         pp_support_state = PP_STATE_SUPPORT_LS;
5920                         pp_state = PP_STATE_LS;
5921                 }
5922                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
5923                         pp_support_state |= PP_STATE_SUPPORT_CG;
5924                         pp_state |= PP_STATE_CG;
5925                 }
5926                 if (state == AMD_CG_STATE_UNGATE)
5927                         pp_state = 0;
5928
5929                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5930                                 PP_BLOCK_GFX_CG,
5931                                 pp_support_state,
5932                                 pp_state);
5933                 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5934         }
5935
5936         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_GFX_3D_CGLS)) {
5937                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) {
5938                         pp_support_state = PP_STATE_SUPPORT_LS;
5939                         pp_state = PP_STATE_LS;
5940                 }
5941                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) {
5942                         pp_support_state |= PP_STATE_SUPPORT_CG;
5943                         pp_state |= PP_STATE_CG;
5944                 }
5945                 if (state == AMD_CG_STATE_UNGATE)
5946                         pp_state = 0;
5947
5948                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5949                                 PP_BLOCK_GFX_3D,
5950                                 pp_support_state,
5951                                 pp_state);
5952                 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5953         }
5954
5955         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
5956                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5957                         pp_support_state = PP_STATE_SUPPORT_LS;
5958                         pp_state = PP_STATE_LS;
5959                 }
5960
5961                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
5962                         pp_support_state |= PP_STATE_SUPPORT_CG;
5963                         pp_state |= PP_STATE_CG;
5964                 }
5965
5966                 if (state == AMD_CG_STATE_UNGATE)
5967                         pp_state = 0;
5968
5969                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5970                                 PP_BLOCK_GFX_MG,
5971                                 pp_support_state,
5972                                 pp_state);
5973                 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5974         }
5975
5976         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
5977                 pp_support_state = PP_STATE_SUPPORT_LS;
5978
5979                 if (state == AMD_CG_STATE_UNGATE)
5980                         pp_state = 0;
5981                 else
5982                         pp_state = PP_STATE_LS;
5983
5984                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5985                                 PP_BLOCK_GFX_RLC,
5986                                 pp_support_state,
5987                                 pp_state);
5988                 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5989         }
5990
5991         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
5992                 pp_support_state = PP_STATE_SUPPORT_LS;
5993
5994                 if (state == AMD_CG_STATE_UNGATE)
5995                         pp_state = 0;
5996                 else
5997                         pp_state = PP_STATE_LS;
5998                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5999                         PP_BLOCK_GFX_CP,
6000                         pp_support_state,
6001                         pp_state);
6002                 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6003         }
6004
6005         return 0;
6006 }
6007
6008 static int gfx_v8_0_set_clockgating_state(void *handle,
6009                                           enum amd_clockgating_state state)
6010 {
6011         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
6012
6013         if (amdgpu_sriov_vf(adev))
6014                 return 0;
6015
6016         switch (adev->asic_type) {
6017         case CHIP_FIJI:
6018         case CHIP_CARRIZO:
6019         case CHIP_STONEY:
6020                 gfx_v8_0_update_gfx_clock_gating(adev,
6021                                                  state == AMD_CG_STATE_GATE);
6022                 break;
6023         case CHIP_TONGA:
6024                 gfx_v8_0_tonga_update_gfx_clock_gating(adev, state);
6025                 break;
6026         case CHIP_POLARIS10:
6027         case CHIP_POLARIS11:
6028         case CHIP_POLARIS12:
6029         case CHIP_VEGAM:
6030                 gfx_v8_0_polaris_update_gfx_clock_gating(adev, state);
6031                 break;
6032         default:
6033                 break;
6034         }
6035         return 0;
6036 }
6037
6038 static u64 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring)
6039 {
6040         return *ring->rptr_cpu_addr;
6041 }
6042
6043 static u64 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
6044 {
6045         struct amdgpu_device *adev = ring->adev;
6046
6047         if (ring->use_doorbell)
6048                 /* XXX check if swapping is necessary on BE */
6049                 return *ring->wptr_cpu_addr;
6050         else
6051                 return RREG32(mmCP_RB0_WPTR);
6052 }
6053
6054 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
6055 {
6056         struct amdgpu_device *adev = ring->adev;
6057
6058         if (ring->use_doorbell) {
6059                 /* XXX check if swapping is necessary on BE */
6060                 *ring->wptr_cpu_addr = lower_32_bits(ring->wptr);
6061                 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6062         } else {
6063                 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
6064                 (void)RREG32(mmCP_RB0_WPTR);
6065         }
6066 }
6067
6068 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
6069 {
6070         u32 ref_and_mask, reg_mem_engine;
6071
6072         if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) ||
6073             (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)) {
6074                 switch (ring->me) {
6075                 case 1:
6076                         ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
6077                         break;
6078                 case 2:
6079                         ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
6080                         break;
6081                 default:
6082                         return;
6083                 }
6084                 reg_mem_engine = 0;
6085         } else {
6086                 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
6087                 reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
6088         }
6089
6090         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6091         amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
6092                                  WAIT_REG_MEM_FUNCTION(3) |  /* == */
6093                                  reg_mem_engine));
6094         amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
6095         amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
6096         amdgpu_ring_write(ring, ref_and_mask);
6097         amdgpu_ring_write(ring, ref_and_mask);
6098         amdgpu_ring_write(ring, 0x20); /* poll interval */
6099 }
6100
6101 static void gfx_v8_0_ring_emit_vgt_flush(struct amdgpu_ring *ring)
6102 {
6103         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6104         amdgpu_ring_write(ring, EVENT_TYPE(VS_PARTIAL_FLUSH) |
6105                 EVENT_INDEX(4));
6106
6107         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6108         amdgpu_ring_write(ring, EVENT_TYPE(VGT_FLUSH) |
6109                 EVENT_INDEX(0));
6110 }
6111
6112 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
6113                                         struct amdgpu_job *job,
6114                                         struct amdgpu_ib *ib,
6115                                         uint32_t flags)
6116 {
6117         unsigned vmid = AMDGPU_JOB_GET_VMID(job);
6118         u32 header, control = 0;
6119
6120         if (ib->flags & AMDGPU_IB_FLAG_CE)
6121                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
6122         else
6123                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
6124
6125         control |= ib->length_dw | (vmid << 24);
6126
6127         if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
6128                 control |= INDIRECT_BUFFER_PRE_ENB(1);
6129
6130                 if (!(ib->flags & AMDGPU_IB_FLAG_CE) && vmid)
6131                         gfx_v8_0_ring_emit_de_meta(ring);
6132         }
6133
6134         amdgpu_ring_write(ring, header);
6135         amdgpu_ring_write(ring,
6136 #ifdef __BIG_ENDIAN
6137                           (2 << 0) |
6138 #endif
6139                           (ib->gpu_addr & 0xFFFFFFFC));
6140         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6141         amdgpu_ring_write(ring, control);
6142 }
6143
6144 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
6145                                           struct amdgpu_job *job,
6146                                           struct amdgpu_ib *ib,
6147                                           uint32_t flags)
6148 {
6149         unsigned vmid = AMDGPU_JOB_GET_VMID(job);
6150         u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
6151
6152         /* Currently, there is a high possibility to get wave ID mismatch
6153          * between ME and GDS, leading to a hw deadlock, because ME generates
6154          * different wave IDs than the GDS expects. This situation happens
6155          * randomly when at least 5 compute pipes use GDS ordered append.
6156          * The wave IDs generated by ME are also wrong after suspend/resume.
6157          * Those are probably bugs somewhere else in the kernel driver.
6158          *
6159          * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
6160          * GDS to 0 for this ring (me/pipe).
6161          */
6162         if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
6163                 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
6164                 amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID - PACKET3_SET_CONFIG_REG_START);
6165                 amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
6166         }
6167
6168         amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
6169         amdgpu_ring_write(ring,
6170 #ifdef __BIG_ENDIAN
6171                                 (2 << 0) |
6172 #endif
6173                                 (ib->gpu_addr & 0xFFFFFFFC));
6174         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6175         amdgpu_ring_write(ring, control);
6176 }
6177
6178 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
6179                                          u64 seq, unsigned flags)
6180 {
6181         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6182         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6183
6184         /* Workaround for cache flush problems. First send a dummy EOP
6185          * event down the pipe with seq one below.
6186          */
6187         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6188         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6189                                  EOP_TC_ACTION_EN |
6190                                  EOP_TC_WB_ACTION_EN |
6191                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6192                                  EVENT_INDEX(5)));
6193         amdgpu_ring_write(ring, addr & 0xfffffffc);
6194         amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
6195                                 DATA_SEL(1) | INT_SEL(0));
6196         amdgpu_ring_write(ring, lower_32_bits(seq - 1));
6197         amdgpu_ring_write(ring, upper_32_bits(seq - 1));
6198
6199         /* Then send the real EOP event down the pipe:
6200          * EVENT_WRITE_EOP - flush caches, send int */
6201         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6202         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6203                                  EOP_TC_ACTION_EN |
6204                                  EOP_TC_WB_ACTION_EN |
6205                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6206                                  EVENT_INDEX(5)));
6207         amdgpu_ring_write(ring, addr & 0xfffffffc);
6208         amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
6209                           DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6210         amdgpu_ring_write(ring, lower_32_bits(seq));
6211         amdgpu_ring_write(ring, upper_32_bits(seq));
6212
6213 }
6214
6215 static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
6216 {
6217         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6218         uint32_t seq = ring->fence_drv.sync_seq;
6219         uint64_t addr = ring->fence_drv.gpu_addr;
6220
6221         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6222         amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
6223                                  WAIT_REG_MEM_FUNCTION(3) | /* equal */
6224                                  WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
6225         amdgpu_ring_write(ring, addr & 0xfffffffc);
6226         amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
6227         amdgpu_ring_write(ring, seq);
6228         amdgpu_ring_write(ring, 0xffffffff);
6229         amdgpu_ring_write(ring, 4); /* poll interval */
6230 }
6231
6232 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
6233                                         unsigned vmid, uint64_t pd_addr)
6234 {
6235         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6236
6237         amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
6238
6239         /* wait for the invalidate to complete */
6240         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6241         amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6242                                  WAIT_REG_MEM_FUNCTION(0) |  /* always */
6243                                  WAIT_REG_MEM_ENGINE(0))); /* me */
6244         amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6245         amdgpu_ring_write(ring, 0);
6246         amdgpu_ring_write(ring, 0); /* ref */
6247         amdgpu_ring_write(ring, 0); /* mask */
6248         amdgpu_ring_write(ring, 0x20); /* poll interval */
6249
6250         /* compute doesn't have PFP */
6251         if (usepfp) {
6252                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
6253                 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6254                 amdgpu_ring_write(ring, 0x0);
6255         }
6256 }
6257
6258 static u64 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
6259 {
6260         return *ring->wptr_cpu_addr;
6261 }
6262
6263 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
6264 {
6265         struct amdgpu_device *adev = ring->adev;
6266
6267         /* XXX check if swapping is necessary on BE */
6268         *ring->wptr_cpu_addr = lower_32_bits(ring->wptr);
6269         WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6270 }
6271
6272 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
6273                                              u64 addr, u64 seq,
6274                                              unsigned flags)
6275 {
6276         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6277         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6278
6279         /* RELEASE_MEM - flush caches, send int */
6280         amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
6281         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6282                                  EOP_TC_ACTION_EN |
6283                                  EOP_TC_WB_ACTION_EN |
6284                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6285                                  EVENT_INDEX(5)));
6286         amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6287         amdgpu_ring_write(ring, addr & 0xfffffffc);
6288         amdgpu_ring_write(ring, upper_32_bits(addr));
6289         amdgpu_ring_write(ring, lower_32_bits(seq));
6290         amdgpu_ring_write(ring, upper_32_bits(seq));
6291 }
6292
6293 static void gfx_v8_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
6294                                          u64 seq, unsigned int flags)
6295 {
6296         /* we only allocate 32bit for each seq wb address */
6297         BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
6298
6299         /* write fence seq to the "addr" */
6300         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6301         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6302                                  WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
6303         amdgpu_ring_write(ring, lower_32_bits(addr));
6304         amdgpu_ring_write(ring, upper_32_bits(addr));
6305         amdgpu_ring_write(ring, lower_32_bits(seq));
6306
6307         if (flags & AMDGPU_FENCE_FLAG_INT) {
6308                 /* set register to trigger INT */
6309                 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6310                 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6311                                          WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
6312                 amdgpu_ring_write(ring, mmCPC_INT_STATUS);
6313                 amdgpu_ring_write(ring, 0);
6314                 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
6315         }
6316 }
6317
6318 static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring)
6319 {
6320         amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6321         amdgpu_ring_write(ring, 0);
6322 }
6323
6324 static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
6325 {
6326         uint32_t dw2 = 0;
6327
6328         if (amdgpu_sriov_vf(ring->adev))
6329                 gfx_v8_0_ring_emit_ce_meta(ring);
6330
6331         dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
6332         if (flags & AMDGPU_HAVE_CTX_SWITCH) {
6333                 gfx_v8_0_ring_emit_vgt_flush(ring);
6334                 /* set load_global_config & load_global_uconfig */
6335                 dw2 |= 0x8001;
6336                 /* set load_cs_sh_regs */
6337                 dw2 |= 0x01000000;
6338                 /* set load_per_context_state & load_gfx_sh_regs for GFX */
6339                 dw2 |= 0x10002;
6340
6341                 /* set load_ce_ram if preamble presented */
6342                 if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
6343                         dw2 |= 0x10000000;
6344         } else {
6345                 /* still load_ce_ram if this is the first time preamble presented
6346                  * although there is no context switch happens.
6347                  */
6348                 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
6349                         dw2 |= 0x10000000;
6350         }
6351
6352         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6353         amdgpu_ring_write(ring, dw2);
6354         amdgpu_ring_write(ring, 0);
6355 }
6356
6357 static unsigned gfx_v8_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
6358 {
6359         unsigned ret;
6360
6361         amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
6362         amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
6363         amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
6364         amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
6365         ret = ring->wptr & ring->buf_mask;
6366         amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
6367         return ret;
6368 }
6369
6370 static void gfx_v8_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
6371 {
6372         unsigned cur;
6373
6374         BUG_ON(offset > ring->buf_mask);
6375         BUG_ON(ring->ring[offset] != 0x55aa55aa);
6376
6377         cur = (ring->wptr & ring->buf_mask) - 1;
6378         if (likely(cur > offset))
6379                 ring->ring[offset] = cur - offset;
6380         else
6381                 ring->ring[offset] = (ring->ring_size >> 2) - offset + cur;
6382 }
6383
6384 static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg,
6385                                     uint32_t reg_val_offs)
6386 {
6387         struct amdgpu_device *adev = ring->adev;
6388
6389         amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
6390         amdgpu_ring_write(ring, 0 |     /* src: register*/
6391                                 (5 << 8) |      /* dst: memory */
6392                                 (1 << 20));     /* write confirm */
6393         amdgpu_ring_write(ring, reg);
6394         amdgpu_ring_write(ring, 0);
6395         amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
6396                                 reg_val_offs * 4));
6397         amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
6398                                 reg_val_offs * 4));
6399 }
6400
6401 static void gfx_v8_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
6402                                   uint32_t val)
6403 {
6404         uint32_t cmd;
6405
6406         switch (ring->funcs->type) {
6407         case AMDGPU_RING_TYPE_GFX:
6408                 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
6409                 break;
6410         case AMDGPU_RING_TYPE_KIQ:
6411                 cmd = 1 << 16; /* no inc addr */
6412                 break;
6413         default:
6414                 cmd = WR_CONFIRM;
6415                 break;
6416         }
6417
6418         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6419         amdgpu_ring_write(ring, cmd);
6420         amdgpu_ring_write(ring, reg);
6421         amdgpu_ring_write(ring, 0);
6422         amdgpu_ring_write(ring, val);
6423 }
6424
6425 static void gfx_v8_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
6426 {
6427         struct amdgpu_device *adev = ring->adev;
6428         uint32_t value = 0;
6429
6430         value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
6431         value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
6432         value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
6433         value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
6434         WREG32(mmSQ_CMD, value);
6435 }
6436
6437 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
6438                                                  enum amdgpu_interrupt_state state)
6439 {
6440         WREG32_FIELD(CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE,
6441                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6442 }
6443
6444 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
6445                                                      int me, int pipe,
6446                                                      enum amdgpu_interrupt_state state)
6447 {
6448         u32 mec_int_cntl, mec_int_cntl_reg;
6449
6450         /*
6451          * amdgpu controls only the first MEC. That's why this function only
6452          * handles the setting of interrupts for this specific MEC. All other
6453          * pipes' interrupts are set by amdkfd.
6454          */
6455
6456         if (me == 1) {
6457                 switch (pipe) {
6458                 case 0:
6459                         mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
6460                         break;
6461                 case 1:
6462                         mec_int_cntl_reg = mmCP_ME1_PIPE1_INT_CNTL;
6463                         break;
6464                 case 2:
6465                         mec_int_cntl_reg = mmCP_ME1_PIPE2_INT_CNTL;
6466                         break;
6467                 case 3:
6468                         mec_int_cntl_reg = mmCP_ME1_PIPE3_INT_CNTL;
6469                         break;
6470                 default:
6471                         DRM_DEBUG("invalid pipe %d\n", pipe);
6472                         return;
6473                 }
6474         } else {
6475                 DRM_DEBUG("invalid me %d\n", me);
6476                 return;
6477         }
6478
6479         switch (state) {
6480         case AMDGPU_IRQ_STATE_DISABLE:
6481                 mec_int_cntl = RREG32(mec_int_cntl_reg);
6482                 mec_int_cntl &= ~CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6483                 WREG32(mec_int_cntl_reg, mec_int_cntl);
6484                 break;
6485         case AMDGPU_IRQ_STATE_ENABLE:
6486                 mec_int_cntl = RREG32(mec_int_cntl_reg);
6487                 mec_int_cntl |= CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6488                 WREG32(mec_int_cntl_reg, mec_int_cntl);
6489                 break;
6490         default:
6491                 break;
6492         }
6493 }
6494
6495 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
6496                                              struct amdgpu_irq_src *source,
6497                                              unsigned type,
6498                                              enum amdgpu_interrupt_state state)
6499 {
6500         WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE,
6501                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6502
6503         return 0;
6504 }
6505
6506 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
6507                                               struct amdgpu_irq_src *source,
6508                                               unsigned type,
6509                                               enum amdgpu_interrupt_state state)
6510 {
6511         WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE,
6512                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6513
6514         return 0;
6515 }
6516
6517 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6518                                             struct amdgpu_irq_src *src,
6519                                             unsigned type,
6520                                             enum amdgpu_interrupt_state state)
6521 {
6522         switch (type) {
6523         case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
6524                 gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
6525                 break;
6526         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6527                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6528                 break;
6529         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6530                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6531                 break;
6532         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6533                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6534                 break;
6535         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6536                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6537                 break;
6538         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6539                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6540                 break;
6541         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6542                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6543                 break;
6544         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6545                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6546                 break;
6547         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6548                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6549                 break;
6550         default:
6551                 break;
6552         }
6553         return 0;
6554 }
6555
6556 static int gfx_v8_0_set_cp_ecc_int_state(struct amdgpu_device *adev,
6557                                          struct amdgpu_irq_src *source,
6558                                          unsigned int type,
6559                                          enum amdgpu_interrupt_state state)
6560 {
6561         int enable_flag;
6562
6563         switch (state) {
6564         case AMDGPU_IRQ_STATE_DISABLE:
6565                 enable_flag = 0;
6566                 break;
6567
6568         case AMDGPU_IRQ_STATE_ENABLE:
6569                 enable_flag = 1;
6570                 break;
6571
6572         default:
6573                 return -EINVAL;
6574         }
6575
6576         WREG32_FIELD(CP_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6577         WREG32_FIELD(CP_INT_CNTL_RING0, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6578         WREG32_FIELD(CP_INT_CNTL_RING1, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6579         WREG32_FIELD(CP_INT_CNTL_RING2, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6580         WREG32_FIELD(CPC_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6581         WREG32_FIELD(CP_ME1_PIPE0_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6582                      enable_flag);
6583         WREG32_FIELD(CP_ME1_PIPE1_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6584                      enable_flag);
6585         WREG32_FIELD(CP_ME1_PIPE2_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6586                      enable_flag);
6587         WREG32_FIELD(CP_ME1_PIPE3_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6588                      enable_flag);
6589         WREG32_FIELD(CP_ME2_PIPE0_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6590                      enable_flag);
6591         WREG32_FIELD(CP_ME2_PIPE1_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6592                      enable_flag);
6593         WREG32_FIELD(CP_ME2_PIPE2_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6594                      enable_flag);
6595         WREG32_FIELD(CP_ME2_PIPE3_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6596                      enable_flag);
6597
6598         return 0;
6599 }
6600
6601 static int gfx_v8_0_set_sq_int_state(struct amdgpu_device *adev,
6602                                      struct amdgpu_irq_src *source,
6603                                      unsigned int type,
6604                                      enum amdgpu_interrupt_state state)
6605 {
6606         int enable_flag;
6607
6608         switch (state) {
6609         case AMDGPU_IRQ_STATE_DISABLE:
6610                 enable_flag = 1;
6611                 break;
6612
6613         case AMDGPU_IRQ_STATE_ENABLE:
6614                 enable_flag = 0;
6615                 break;
6616
6617         default:
6618                 return -EINVAL;
6619         }
6620
6621         WREG32_FIELD(SQ_INTERRUPT_MSG_CTRL, STALL,
6622                      enable_flag);
6623
6624         return 0;
6625 }
6626
6627 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
6628                             struct amdgpu_irq_src *source,
6629                             struct amdgpu_iv_entry *entry)
6630 {
6631         int i;
6632         u8 me_id, pipe_id, queue_id;
6633         struct amdgpu_ring *ring;
6634
6635         DRM_DEBUG("IH: CP EOP\n");
6636         me_id = (entry->ring_id & 0x0c) >> 2;
6637         pipe_id = (entry->ring_id & 0x03) >> 0;
6638         queue_id = (entry->ring_id & 0x70) >> 4;
6639
6640         switch (me_id) {
6641         case 0:
6642                 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6643                 break;
6644         case 1:
6645         case 2:
6646                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6647                         ring = &adev->gfx.compute_ring[i];
6648                         /* Per-queue interrupt is supported for MEC starting from VI.
6649                           * The interrupt can only be enabled/disabled per pipe instead of per queue.
6650                           */
6651                         if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6652                                 amdgpu_fence_process(ring);
6653                 }
6654                 break;
6655         }
6656         return 0;
6657 }
6658
6659 static void gfx_v8_0_fault(struct amdgpu_device *adev,
6660                            struct amdgpu_iv_entry *entry)
6661 {
6662         u8 me_id, pipe_id, queue_id;
6663         struct amdgpu_ring *ring;
6664         int i;
6665
6666         me_id = (entry->ring_id & 0x0c) >> 2;
6667         pipe_id = (entry->ring_id & 0x03) >> 0;
6668         queue_id = (entry->ring_id & 0x70) >> 4;
6669
6670         switch (me_id) {
6671         case 0:
6672                 drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
6673                 break;
6674         case 1:
6675         case 2:
6676                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6677                         ring = &adev->gfx.compute_ring[i];
6678                         if (ring->me == me_id && ring->pipe == pipe_id &&
6679                             ring->queue == queue_id)
6680                                 drm_sched_fault(&ring->sched);
6681                 }
6682                 break;
6683         }
6684 }
6685
6686 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
6687                                  struct amdgpu_irq_src *source,
6688                                  struct amdgpu_iv_entry *entry)
6689 {
6690         DRM_ERROR("Illegal register access in command stream\n");
6691         gfx_v8_0_fault(adev, entry);
6692         return 0;
6693 }
6694
6695 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
6696                                   struct amdgpu_irq_src *source,
6697                                   struct amdgpu_iv_entry *entry)
6698 {
6699         DRM_ERROR("Illegal instruction in command stream\n");
6700         gfx_v8_0_fault(adev, entry);
6701         return 0;
6702 }
6703
6704 static int gfx_v8_0_cp_ecc_error_irq(struct amdgpu_device *adev,
6705                                      struct amdgpu_irq_src *source,
6706                                      struct amdgpu_iv_entry *entry)
6707 {
6708         DRM_ERROR("CP EDC/ECC error detected.");
6709         return 0;
6710 }
6711
6712 static void gfx_v8_0_parse_sq_irq(struct amdgpu_device *adev, unsigned ih_data,
6713                                   bool from_wq)
6714 {
6715         u32 enc, se_id, sh_id, cu_id;
6716         char type[20];
6717         int sq_edc_source = -1;
6718
6719         enc = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_CMN, ENCODING);
6720         se_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_CMN, SE_ID);
6721
6722         switch (enc) {
6723                 case 0:
6724                         DRM_INFO("SQ general purpose intr detected:"
6725                                         "se_id %d, immed_overflow %d, host_reg_overflow %d,"
6726                                         "host_cmd_overflow %d, cmd_timestamp %d,"
6727                                         "reg_timestamp %d, thread_trace_buff_full %d,"
6728                                         "wlt %d, thread_trace %d.\n",
6729                                         se_id,
6730                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, IMMED_OVERFLOW),
6731                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, HOST_REG_OVERFLOW),
6732                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, HOST_CMD_OVERFLOW),
6733                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, CMD_TIMESTAMP),
6734                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, REG_TIMESTAMP),
6735                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, THREAD_TRACE_BUF_FULL),
6736                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, WLT),
6737                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, THREAD_TRACE)
6738                                         );
6739                         break;
6740                 case 1:
6741                 case 2:
6742
6743                         cu_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, CU_ID);
6744                         sh_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, SH_ID);
6745
6746                         /*
6747                          * This function can be called either directly from ISR
6748                          * or from BH in which case we can access SQ_EDC_INFO
6749                          * instance
6750                          */
6751                         if (from_wq) {
6752                                 mutex_lock(&adev->grbm_idx_mutex);
6753                                 gfx_v8_0_select_se_sh(adev, se_id, sh_id, cu_id);
6754
6755                                 sq_edc_source = REG_GET_FIELD(RREG32(mmSQ_EDC_INFO), SQ_EDC_INFO, SOURCE);
6756
6757                                 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6758                                 mutex_unlock(&adev->grbm_idx_mutex);
6759                         }
6760
6761                         if (enc == 1)
6762                                 sprintf(type, "instruction intr");
6763                         else
6764                                 sprintf(type, "EDC/ECC error");
6765
6766                         DRM_INFO(
6767                                 "SQ %s detected: "
6768                                         "se_id %d, sh_id %d, cu_id %d, simd_id %d, wave_id %d, vm_id %d "
6769                                         "trap %s, sq_ed_info.source %s.\n",
6770                                         type, se_id, sh_id, cu_id,
6771                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, SIMD_ID),
6772                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, WAVE_ID),
6773                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, VM_ID),
6774                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, PRIV) ? "true" : "false",
6775                                         (sq_edc_source != -1) ? sq_edc_source_names[sq_edc_source] : "unavailable"
6776                                 );
6777                         break;
6778                 default:
6779                         DRM_ERROR("SQ invalid encoding type\n.");
6780         }
6781 }
6782
6783 static void gfx_v8_0_sq_irq_work_func(struct work_struct *work)
6784 {
6785
6786         struct amdgpu_device *adev = container_of(work, struct amdgpu_device, gfx.sq_work.work);
6787         struct sq_work *sq_work = container_of(work, struct sq_work, work);
6788
6789         gfx_v8_0_parse_sq_irq(adev, sq_work->ih_data, true);
6790 }
6791
6792 static int gfx_v8_0_sq_irq(struct amdgpu_device *adev,
6793                            struct amdgpu_irq_src *source,
6794                            struct amdgpu_iv_entry *entry)
6795 {
6796         unsigned ih_data = entry->src_data[0];
6797
6798         /*
6799          * Try to submit work so SQ_EDC_INFO can be accessed from
6800          * BH. If previous work submission hasn't finished yet
6801          * just print whatever info is possible directly from the ISR.
6802          */
6803         if (work_pending(&adev->gfx.sq_work.work)) {
6804                 gfx_v8_0_parse_sq_irq(adev, ih_data, false);
6805         } else {
6806                 adev->gfx.sq_work.ih_data = ih_data;
6807                 schedule_work(&adev->gfx.sq_work.work);
6808         }
6809
6810         return 0;
6811 }
6812
6813 static void gfx_v8_0_emit_mem_sync(struct amdgpu_ring *ring)
6814 {
6815         amdgpu_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
6816         amdgpu_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
6817                           PACKET3_TC_ACTION_ENA |
6818                           PACKET3_SH_KCACHE_ACTION_ENA |
6819                           PACKET3_SH_ICACHE_ACTION_ENA |
6820                           PACKET3_TC_WB_ACTION_ENA);  /* CP_COHER_CNTL */
6821         amdgpu_ring_write(ring, 0xffffffff);  /* CP_COHER_SIZE */
6822         amdgpu_ring_write(ring, 0);  /* CP_COHER_BASE */
6823         amdgpu_ring_write(ring, 0x0000000A); /* poll interval */
6824 }
6825
6826 static void gfx_v8_0_emit_mem_sync_compute(struct amdgpu_ring *ring)
6827 {
6828         amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 5));
6829         amdgpu_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
6830                           PACKET3_TC_ACTION_ENA |
6831                           PACKET3_SH_KCACHE_ACTION_ENA |
6832                           PACKET3_SH_ICACHE_ACTION_ENA |
6833                           PACKET3_TC_WB_ACTION_ENA);  /* CP_COHER_CNTL */
6834         amdgpu_ring_write(ring, 0xffffffff);    /* CP_COHER_SIZE */
6835         amdgpu_ring_write(ring, 0xff);          /* CP_COHER_SIZE_HI */
6836         amdgpu_ring_write(ring, 0);             /* CP_COHER_BASE */
6837         amdgpu_ring_write(ring, 0);             /* CP_COHER_BASE_HI */
6838         amdgpu_ring_write(ring, 0x0000000A);    /* poll interval */
6839 }
6840
6841
6842 /* mmSPI_WCL_PIPE_PERCENT_CS[0-7]_DEFAULT values are same */
6843 #define mmSPI_WCL_PIPE_PERCENT_CS_DEFAULT       0x0000007f
6844 static void gfx_v8_0_emit_wave_limit_cs(struct amdgpu_ring *ring,
6845                                         uint32_t pipe, bool enable)
6846 {
6847         uint32_t val;
6848         uint32_t wcl_cs_reg;
6849
6850         val = enable ? 0x1 : mmSPI_WCL_PIPE_PERCENT_CS_DEFAULT;
6851
6852         switch (pipe) {
6853         case 0:
6854                 wcl_cs_reg = mmSPI_WCL_PIPE_PERCENT_CS0;
6855                 break;
6856         case 1:
6857                 wcl_cs_reg = mmSPI_WCL_PIPE_PERCENT_CS1;
6858                 break;
6859         case 2:
6860                 wcl_cs_reg = mmSPI_WCL_PIPE_PERCENT_CS2;
6861                 break;
6862         case 3:
6863                 wcl_cs_reg = mmSPI_WCL_PIPE_PERCENT_CS3;
6864                 break;
6865         default:
6866                 DRM_DEBUG("invalid pipe %d\n", pipe);
6867                 return;
6868         }
6869
6870         amdgpu_ring_emit_wreg(ring, wcl_cs_reg, val);
6871
6872 }
6873
6874 #define mmSPI_WCL_PIPE_PERCENT_GFX_DEFAULT      0x07ffffff
6875 static void gfx_v8_0_emit_wave_limit(struct amdgpu_ring *ring, bool enable)
6876 {
6877         struct amdgpu_device *adev = ring->adev;
6878         uint32_t val;
6879         int i;
6880
6881         /* mmSPI_WCL_PIPE_PERCENT_GFX is 7 bit multiplier register to limit
6882          * number of gfx waves. Setting 5 bit will make sure gfx only gets
6883          * around 25% of gpu resources.
6884          */
6885         val = enable ? 0x1f : mmSPI_WCL_PIPE_PERCENT_GFX_DEFAULT;
6886         amdgpu_ring_emit_wreg(ring, mmSPI_WCL_PIPE_PERCENT_GFX, val);
6887
6888         /* Restrict waves for normal/low priority compute queues as well
6889          * to get best QoS for high priority compute jobs.
6890          *
6891          * amdgpu controls only 1st ME(0-3 CS pipes).
6892          */
6893         for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) {
6894                 if (i != ring->pipe)
6895                         gfx_v8_0_emit_wave_limit_cs(ring, i, enable);
6896
6897         }
6898
6899 }
6900
6901 static const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
6902         .name = "gfx_v8_0",
6903         .early_init = gfx_v8_0_early_init,
6904         .late_init = gfx_v8_0_late_init,
6905         .sw_init = gfx_v8_0_sw_init,
6906         .sw_fini = gfx_v8_0_sw_fini,
6907         .hw_init = gfx_v8_0_hw_init,
6908         .hw_fini = gfx_v8_0_hw_fini,
6909         .suspend = gfx_v8_0_suspend,
6910         .resume = gfx_v8_0_resume,
6911         .is_idle = gfx_v8_0_is_idle,
6912         .wait_for_idle = gfx_v8_0_wait_for_idle,
6913         .check_soft_reset = gfx_v8_0_check_soft_reset,
6914         .pre_soft_reset = gfx_v8_0_pre_soft_reset,
6915         .soft_reset = gfx_v8_0_soft_reset,
6916         .post_soft_reset = gfx_v8_0_post_soft_reset,
6917         .set_clockgating_state = gfx_v8_0_set_clockgating_state,
6918         .set_powergating_state = gfx_v8_0_set_powergating_state,
6919         .get_clockgating_state = gfx_v8_0_get_clockgating_state,
6920 };
6921
6922 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
6923         .type = AMDGPU_RING_TYPE_GFX,
6924         .align_mask = 0xff,
6925         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6926         .support_64bit_ptrs = false,
6927         .get_rptr = gfx_v8_0_ring_get_rptr,
6928         .get_wptr = gfx_v8_0_ring_get_wptr_gfx,
6929         .set_wptr = gfx_v8_0_ring_set_wptr_gfx,
6930         .emit_frame_size = /* maximum 215dw if count 16 IBs in */
6931                 5 +  /* COND_EXEC */
6932                 7 +  /* PIPELINE_SYNC */
6933                 VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 9 + /* VM_FLUSH */
6934                 12 +  /* FENCE for VM_FLUSH */
6935                 20 + /* GDS switch */
6936                 4 + /* double SWITCH_BUFFER,
6937                        the first COND_EXEC jump to the place just
6938                            prior to this double SWITCH_BUFFER  */
6939                 5 + /* COND_EXEC */
6940                 7 +      /*     HDP_flush */
6941                 4 +      /*     VGT_flush */
6942                 14 + /* CE_META */
6943                 31 + /* DE_META */
6944                 3 + /* CNTX_CTRL */
6945                 5 + /* HDP_INVL */
6946                 12 + 12 + /* FENCE x2 */
6947                 2 + /* SWITCH_BUFFER */
6948                 5, /* SURFACE_SYNC */
6949         .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_gfx */
6950         .emit_ib = gfx_v8_0_ring_emit_ib_gfx,
6951         .emit_fence = gfx_v8_0_ring_emit_fence_gfx,
6952         .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6953         .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6954         .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6955         .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6956         .test_ring = gfx_v8_0_ring_test_ring,
6957         .test_ib = gfx_v8_0_ring_test_ib,
6958         .insert_nop = amdgpu_ring_insert_nop,
6959         .pad_ib = amdgpu_ring_generic_pad_ib,
6960         .emit_switch_buffer = gfx_v8_ring_emit_sb,
6961         .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
6962         .init_cond_exec = gfx_v8_0_ring_emit_init_cond_exec,
6963         .patch_cond_exec = gfx_v8_0_ring_emit_patch_cond_exec,
6964         .emit_wreg = gfx_v8_0_ring_emit_wreg,
6965         .soft_recovery = gfx_v8_0_ring_soft_recovery,
6966         .emit_mem_sync = gfx_v8_0_emit_mem_sync,
6967 };
6968
6969 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
6970         .type = AMDGPU_RING_TYPE_COMPUTE,
6971         .align_mask = 0xff,
6972         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6973         .support_64bit_ptrs = false,
6974         .get_rptr = gfx_v8_0_ring_get_rptr,
6975         .get_wptr = gfx_v8_0_ring_get_wptr_compute,
6976         .set_wptr = gfx_v8_0_ring_set_wptr_compute,
6977         .emit_frame_size =
6978                 20 + /* gfx_v8_0_ring_emit_gds_switch */
6979                 7 + /* gfx_v8_0_ring_emit_hdp_flush */
6980                 5 + /* hdp_invalidate */
6981                 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6982                 VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v8_0_ring_emit_vm_flush */
6983                 7 + 7 + 7 + /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
6984                 7 + /* gfx_v8_0_emit_mem_sync_compute */
6985                 5 + /* gfx_v8_0_emit_wave_limit for updating mmSPI_WCL_PIPE_PERCENT_GFX register */
6986                 15, /* for updating 3 mmSPI_WCL_PIPE_PERCENT_CS registers */
6987         .emit_ib_size = 7, /* gfx_v8_0_ring_emit_ib_compute */
6988         .emit_ib = gfx_v8_0_ring_emit_ib_compute,
6989         .emit_fence = gfx_v8_0_ring_emit_fence_compute,
6990         .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6991         .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6992         .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6993         .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6994         .test_ring = gfx_v8_0_ring_test_ring,
6995         .test_ib = gfx_v8_0_ring_test_ib,
6996         .insert_nop = amdgpu_ring_insert_nop,
6997         .pad_ib = amdgpu_ring_generic_pad_ib,
6998         .emit_wreg = gfx_v8_0_ring_emit_wreg,
6999         .emit_mem_sync = gfx_v8_0_emit_mem_sync_compute,
7000         .emit_wave_limit = gfx_v8_0_emit_wave_limit,
7001 };
7002
7003 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {
7004         .type = AMDGPU_RING_TYPE_KIQ,
7005         .align_mask = 0xff,
7006         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
7007         .support_64bit_ptrs = false,
7008         .get_rptr = gfx_v8_0_ring_get_rptr,
7009         .get_wptr = gfx_v8_0_ring_get_wptr_compute,
7010         .set_wptr = gfx_v8_0_ring_set_wptr_compute,
7011         .emit_frame_size =
7012                 20 + /* gfx_v8_0_ring_emit_gds_switch */
7013                 7 + /* gfx_v8_0_ring_emit_hdp_flush */
7014                 5 + /* hdp_invalidate */
7015                 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
7016                 17 + /* gfx_v8_0_ring_emit_vm_flush */
7017                 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */
7018         .emit_ib_size = 7, /* gfx_v8_0_ring_emit_ib_compute */
7019         .emit_fence = gfx_v8_0_ring_emit_fence_kiq,
7020         .test_ring = gfx_v8_0_ring_test_ring,
7021         .insert_nop = amdgpu_ring_insert_nop,
7022         .pad_ib = amdgpu_ring_generic_pad_ib,
7023         .emit_rreg = gfx_v8_0_ring_emit_rreg,
7024         .emit_wreg = gfx_v8_0_ring_emit_wreg,
7025 };
7026
7027 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
7028 {
7029         int i;
7030
7031         adev->gfx.kiq.ring.funcs = &gfx_v8_0_ring_funcs_kiq;
7032
7033         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
7034                 adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
7035
7036         for (i = 0; i < adev->gfx.num_compute_rings; i++)
7037                 adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
7038 }
7039
7040 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
7041         .set = gfx_v8_0_set_eop_interrupt_state,
7042         .process = gfx_v8_0_eop_irq,
7043 };
7044
7045 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
7046         .set = gfx_v8_0_set_priv_reg_fault_state,
7047         .process = gfx_v8_0_priv_reg_irq,
7048 };
7049
7050 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
7051         .set = gfx_v8_0_set_priv_inst_fault_state,
7052         .process = gfx_v8_0_priv_inst_irq,
7053 };
7054
7055 static const struct amdgpu_irq_src_funcs gfx_v8_0_cp_ecc_error_irq_funcs = {
7056         .set = gfx_v8_0_set_cp_ecc_int_state,
7057         .process = gfx_v8_0_cp_ecc_error_irq,
7058 };
7059
7060 static const struct amdgpu_irq_src_funcs gfx_v8_0_sq_irq_funcs = {
7061         .set = gfx_v8_0_set_sq_int_state,
7062         .process = gfx_v8_0_sq_irq,
7063 };
7064
7065 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
7066 {
7067         adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
7068         adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
7069
7070         adev->gfx.priv_reg_irq.num_types = 1;
7071         adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
7072
7073         adev->gfx.priv_inst_irq.num_types = 1;
7074         adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
7075
7076         adev->gfx.cp_ecc_error_irq.num_types = 1;
7077         adev->gfx.cp_ecc_error_irq.funcs = &gfx_v8_0_cp_ecc_error_irq_funcs;
7078
7079         adev->gfx.sq_irq.num_types = 1;
7080         adev->gfx.sq_irq.funcs = &gfx_v8_0_sq_irq_funcs;
7081 }
7082
7083 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
7084 {
7085         adev->gfx.rlc.funcs = &iceland_rlc_funcs;
7086 }
7087
7088 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
7089 {
7090         /* init asci gds info */
7091         adev->gds.gds_size = RREG32(mmGDS_VMID0_SIZE);
7092         adev->gds.gws_size = 64;
7093         adev->gds.oa_size = 16;
7094         adev->gds.gds_compute_max_wave_id = RREG32(mmGDS_COMPUTE_MAX_WAVE_ID);
7095 }
7096
7097 static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
7098                                                  u32 bitmap)
7099 {
7100         u32 data;
7101
7102         if (!bitmap)
7103                 return;
7104
7105         data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7106         data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7107
7108         WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
7109 }
7110
7111 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
7112 {
7113         u32 data, mask;
7114
7115         data =  RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) |
7116                 RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
7117
7118         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
7119
7120         return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask;
7121 }
7122
7123 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
7124 {
7125         int i, j, k, counter, active_cu_number = 0;
7126         u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
7127         struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
7128         unsigned disable_masks[4 * 2];
7129         u32 ao_cu_num;
7130
7131         memset(cu_info, 0, sizeof(*cu_info));
7132
7133         if (adev->flags & AMD_IS_APU)
7134                 ao_cu_num = 2;
7135         else
7136                 ao_cu_num = adev->gfx.config.max_cu_per_sh;
7137
7138         amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
7139
7140         mutex_lock(&adev->grbm_idx_mutex);
7141         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
7142                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
7143                         mask = 1;
7144                         ao_bitmap = 0;
7145                         counter = 0;
7146                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
7147                         if (i < 4 && j < 2)
7148                                 gfx_v8_0_set_user_cu_inactive_bitmap(
7149                                         adev, disable_masks[i * 2 + j]);
7150                         bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
7151                         cu_info->bitmap[i][j] = bitmap;
7152
7153                         for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
7154                                 if (bitmap & mask) {
7155                                         if (counter < ao_cu_num)
7156                                                 ao_bitmap |= mask;
7157                                         counter ++;
7158                                 }
7159                                 mask <<= 1;
7160                         }
7161                         active_cu_number += counter;
7162                         if (i < 2 && j < 2)
7163                                 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
7164                         cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
7165                 }
7166         }
7167         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
7168         mutex_unlock(&adev->grbm_idx_mutex);
7169
7170         cu_info->number = active_cu_number;
7171         cu_info->ao_cu_mask = ao_cu_mask;
7172         cu_info->simd_per_cu = NUM_SIMD_PER_CU;
7173         cu_info->max_waves_per_simd = 10;
7174         cu_info->max_scratch_slots_per_cu = 32;
7175         cu_info->wave_front_size = 64;
7176         cu_info->lds_size = 64;
7177 }
7178
7179 const struct amdgpu_ip_block_version gfx_v8_0_ip_block =
7180 {
7181         .type = AMD_IP_BLOCK_TYPE_GFX,
7182         .major = 8,
7183         .minor = 0,
7184         .rev = 0,
7185         .funcs = &gfx_v8_0_ip_funcs,
7186 };
7187
7188 const struct amdgpu_ip_block_version gfx_v8_1_ip_block =
7189 {
7190         .type = AMD_IP_BLOCK_TYPE_GFX,
7191         .major = 8,
7192         .minor = 1,
7193         .rev = 0,
7194         .funcs = &gfx_v8_0_ip_funcs,
7195 };
7196
7197 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
7198 {
7199         uint64_t ce_payload_addr;
7200         int cnt_ce;
7201         union {
7202                 struct vi_ce_ib_state regular;
7203                 struct vi_ce_ib_state_chained_ib chained;
7204         } ce_payload = {};
7205
7206         if (ring->adev->virt.chained_ib_support) {
7207                 ce_payload_addr = amdgpu_csa_vaddr(ring->adev) +
7208                         offsetof(struct vi_gfx_meta_data_chained_ib, ce_payload);
7209                 cnt_ce = (sizeof(ce_payload.chained) >> 2) + 4 - 2;
7210         } else {
7211                 ce_payload_addr = amdgpu_csa_vaddr(ring->adev) +
7212                         offsetof(struct vi_gfx_meta_data, ce_payload);
7213                 cnt_ce = (sizeof(ce_payload.regular) >> 2) + 4 - 2;
7214         }
7215
7216         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_ce));
7217         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
7218                                 WRITE_DATA_DST_SEL(8) |
7219                                 WR_CONFIRM) |
7220                                 WRITE_DATA_CACHE_POLICY(0));
7221         amdgpu_ring_write(ring, lower_32_bits(ce_payload_addr));
7222         amdgpu_ring_write(ring, upper_32_bits(ce_payload_addr));
7223         amdgpu_ring_write_multiple(ring, (void *)&ce_payload, cnt_ce - 2);
7224 }
7225
7226 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring)
7227 {
7228         uint64_t de_payload_addr, gds_addr, csa_addr;
7229         int cnt_de;
7230         union {
7231                 struct vi_de_ib_state regular;
7232                 struct vi_de_ib_state_chained_ib chained;
7233         } de_payload = {};
7234
7235         csa_addr = amdgpu_csa_vaddr(ring->adev);
7236         gds_addr = csa_addr + 4096;
7237         if (ring->adev->virt.chained_ib_support) {
7238                 de_payload.chained.gds_backup_addrlo = lower_32_bits(gds_addr);
7239                 de_payload.chained.gds_backup_addrhi = upper_32_bits(gds_addr);
7240                 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data_chained_ib, de_payload);
7241                 cnt_de = (sizeof(de_payload.chained) >> 2) + 4 - 2;
7242         } else {
7243                 de_payload.regular.gds_backup_addrlo = lower_32_bits(gds_addr);
7244                 de_payload.regular.gds_backup_addrhi = upper_32_bits(gds_addr);
7245                 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data, de_payload);
7246                 cnt_de = (sizeof(de_payload.regular) >> 2) + 4 - 2;
7247         }
7248
7249         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_de));
7250         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
7251                                 WRITE_DATA_DST_SEL(8) |
7252                                 WR_CONFIRM) |
7253                                 WRITE_DATA_CACHE_POLICY(0));
7254         amdgpu_ring_write(ring, lower_32_bits(de_payload_addr));
7255         amdgpu_ring_write(ring, upper_32_bits(de_payload_addr));
7256         amdgpu_ring_write_multiple(ring, (void *)&de_payload, cnt_de - 2);
7257 }
This page took 0.487835 seconds and 4 git commands to generate.